change d2d to d2h & h2d

This commit is contained in:
shin0403 2024-04-24 11:11:50 +09:00
parent ea26d69751
commit 80c81fc77d
2 changed files with 1437 additions and 1438 deletions

View file

@ -6,22 +6,19 @@
#ifndef _CL_HELPER_
#define _CL_HELPER_
#include <CL/cl.h>
#include <vector>
#include <iostream>
#include <fstream>
#include <string>
#include "util.h"
#ifdef TIMING
#include "timing.h"
#endif
#include "util.h"
#include <CL/cl.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using std::string;
using std::ifstream;
using std::cerr;
using std::cout;
using std::endl;
//using std::cout;
using std::ifstream;
using std::string;
//#define PROFILE_
#ifdef PROFILE_
@ -132,29 +129,6 @@ int work_group_size = BLOCK_SIZE_0;
int device_id_inused = 0; //deviced id used (default : 0)
int number_devices = 0;
int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
/*------------------------------------------------------------
@function: select device to use
@params:
@ -162,7 +136,8 @@ int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
@return: NULL
@date: 24/03/2011
------------------------------------------------------------*/
void _clSetDevice(int idx) throw(string){
void _clSetDevice(int idx) throw(string)
{
cl_int resultCL;
oclHandles.context = NULL;
@ -200,7 +175,6 @@ void _clSetDevice(int idx) throw(string){
if (idx > (deviceListSize - 1))
throw(string(":invalid device ID:"));
device_id_inused = idx;
}
/*------------------------------------------------------------
@ -211,7 +185,8 @@ void _clSetDevice(int idx) throw(string){
@return: prop
@date: 24/03/2011
------------------------------------------------------------*/
void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
void _clGetDeviceProperties(int idx, _clDeviceProp* prop) throw(string)
{
oclHandles.cl_status = clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
@ -234,10 +209,33 @@ void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
#endif
}
int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
{
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
/*
* Converts the contents of a file into a string
*/
string FileToString(const string fileName){
string FileToString(const string fileName)
{
ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
try {
@ -252,7 +250,8 @@ string FileToString(const string fileName){
f.seekg(0, ifstream::beg);
str = new char[size + 1];
if (!str) throw(string("Could not allocate memory"));
if (!str)
throw(string("Could not allocate memory"));
f.read(str, fileSize);
f.close();
@ -262,13 +261,11 @@ string FileToString(const string fileName){
delete[] str;
return s;
}
}
catch(std::string msg){
} catch (std::string msg) {
cerr << "Exception caught in FileToString(): " << msg << endl;
if (f.is_open())
f.close();
}
catch(...){
} catch (...) {
cerr << "Exception caught in FileToString()" << endl;
if (f.is_open())
f.close();
@ -287,14 +284,14 @@ string FileToString(const string fileName){
char device_type[3];
int device_id = 0;
int platform_id = 0;
void _clCmdParams(int argc, char* argv[]){
void _clCmdParams(int argc, char* argv[])
{
for (int i = 0; i < argc; ++i) {
switch (argv[i][1]) {
case 't': //--t stands for device type
if (++i < argc) {
sscanf(argv[i], "%s", device_type);
}
else{
} else {
std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl;
throw;
}
@ -302,8 +299,7 @@ void _clCmdParams(int argc, char* argv[]){
case 'd': //--d stands for device id
if (++i < argc) {
sscanf(argv[i], "%d", &device_id);
}
else{
} else {
std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl;
throw;
}
@ -311,14 +307,12 @@ void _clCmdParams(int argc, char* argv[]){
case 'p': //--p stands for platform id
if (++i < argc) {
sscanf(argv[i], "%d", &platform_id);
}
else{
} else {
std::cerr << "Could not read argument after option " << argv[i - 1] << std::endl;
throw;
}
break;
default:
;
default:;
}
}
}
@ -340,7 +334,8 @@ void _clCmdParams(int argc, char* argv[]){
get the number of devices and devices have no relationship with context
@date: 24/03/2011
------------------------------------------------------------*/
void _clInit(string device_type, int device_id)throw(string){
void _clInit(string device_type, int device_id) throw(string)
{
#ifdef TIMING
gettimeofday(&tv_total_start, NULL);
#endif
@ -380,7 +375,9 @@ void _clInit(string device_type, int device_id)throw(string){
if (resultCL != CL_SUCCESS)
throw(string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
//printf("number of platforms:%d\n",numPlatforms); //by cambine
printf("--cambine: number of platforms: %d\n",numPlatforms);
#ifdef DEV_INFO
//std::cout << "--cambine: number of platforms: " << numPlatforms << std::endl;
#endif
if (!(numPlatforms > 0))
throw(string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
@ -393,8 +390,7 @@ void _clInit(string device_type, int device_id)throw(string){
// Select the target platform. Default: first platform
targetPlatform = allPlatforms[platform_id];
for (int i = 0; i < numPlatforms; i++)
{
for (int i = 0; i < numPlatforms; i++) {
char pbuff[128];
resultCL = clGetPlatformInfo(allPlatforms[i],
CL_PLATFORM_VENDOR,
@ -408,7 +404,6 @@ void _clInit(string device_type, int device_id)throw(string){
#ifdef DEV_INFO
//std::cout << "--cambine: vedor is: " << pbuff << std::endl;
#endif
}
free(allPlatforms);
//-----------------------------------------------
@ -433,8 +428,7 @@ void _clInit(string device_type, int device_id)throw(string){
throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
}
}
}
else{
} else {
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
if (oclHandles.cl_status != CL_SUCCESS) {
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
@ -444,9 +438,9 @@ void _clInit(string device_type, int device_id)throw(string){
if (deviceListSize == 0)
throw(string("InitCL()::Error: No devices found."));
//#ifdef DEV_INFO
printf("--cambine: number of device\n"); //<<deviceListSize<<std::endl;
//#endif
#ifdef DEV_INFO
//std::cout << "--cambine: number of device=" << deviceListSize << std::endl;
#endif
number_devices = deviceListSize;
// Now, allocate the device list
// oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
@ -475,8 +469,7 @@ void _clInit(string device_type, int device_id)throw(string){
throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
}
}
}
else{
} else {
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
if (oclHandles.cl_status != CL_SUCCESS) {
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
@ -491,9 +484,9 @@ void _clInit(string device_type, int device_id)throw(string){
_clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
//std::cout << "--cambine: device name=" << prop.device_name << std::endl;
//#ifdef DEV_INFO
printf("--cambine: return device list successfully!\n");
//#endif
#ifdef DEV_INFO
//std::cout << "--cambine: return device list successfully!" << std::endl;
#endif
//-----------------------------------------------
//--cambine-3: create an OpenCL context
@ -510,11 +503,12 @@ void _clInit(string device_type, int device_id)throw(string){
if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
throw(string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
printf("--cambine: create OCL context successfully!\n");
#ifdef DEV_INFO
//std::cout << "--cambine: create OCL context successfully!" << std::endl;
#endif
//-----------------------------------------------
//--cambine-4: Create an OpenCL command queue
/*
#ifdef TIMING
oclHandles.queue = clCreateCommandQueue(oclHandles.context,
oclHandles.devices[DEVICE_ID_INUSED],
@ -523,26 +517,16 @@ void _clInit(string device_type, int device_id)throw(string){
#else
oclHandles.queue = clCreateCommandQueue(oclHandles.context,
oclHandles.devices[DEVICE_ID_INUSED],
0,
CL_QUEUE_PROFILING_ENABLE,
&resultCL);
#endif
printf("???\n");*/
#ifdef TIMING
oclHandles.queue = clCreateCommandQueueWithProperties(oclHandles.context,
oclHandles.devices[DEVICE_ID_INUSED], CL_QUEUE_PROFILING_ENABLE,
&resultCL);
#else
oclHandles.queue = clCreateCommandQueueWithProperties(oclHandles.context,
oclHandles.devices[DEVICE_ID_INUSED], 0, &resultCL);
#endif
if (oclHandles.queue == NULL)
if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
//#ifdef PROFILE_
// double t2 = gettime();
// CC += t2 - t1;
//#endif
#ifdef PROFILE_
double t2 = gettime();
CC += t2 - t1;
#endif
//-----------------------------------------------
//--cambine-5: Load CL file, build CL program object, create CL kernel object
/*
@ -566,10 +550,9 @@ void _clInit(string device_type, int device_id)throw(string){
std::abort();
oclHandles.program = clCreateProgramWithBinary(
oclHandles.context, 1, &oclHandles.devices[device_id_inused], &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &resultCL);
oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &resultCL);
free(kernel_bin);
//insert debug information
std::string options = "";
//options += " -cl-nv-opt-level=3";
@ -606,8 +589,6 @@ void _clInit(string device_type, int device_id)throw(string){
throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
}
printf("build program");
#ifdef PROFILE_
double t3 = gettime();
KC += t3 - t2;
@ -644,15 +625,13 @@ void _clInit(string device_type, int device_id)throw(string){
free(binaries[i]);
#endif
for (int nKernel = 0; nKernel < total_kernels; nKernel++)
{
for (int nKernel = 0; nKernel < total_kernels; nKernel++) {
// get a kernel object handle for a kernel with the given name
cl_kernel kernel = clCreateKernel(oclHandles.program,
(kernel_names[nKernel]).c_str(),
&resultCL);
if ((resultCL != CL_SUCCESS) || (kernel == NULL))
{
if ((resultCL != CL_SUCCESS) || (kernel == NULL)) {
string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\"";
throw(errorMsg);
}
@ -729,8 +708,7 @@ void _clRelease()
if (oclHandles.queue != NULL) {
cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
if (resultCL != CL_SUCCESS)
{
if (resultCL != CL_SUCCESS) {
cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
errorFlag = true;
}
@ -748,7 +726,8 @@ void _clRelease()
oclHandles.context = NULL;
}
if (errorFlag) throw(string("ReleaseCL()::Error encountered."));
if (errorFlag)
throw(string("ReleaseCL()::Error encountered."));
#ifdef PROFILE_
double t2 = gettime();
CR += t2 - t1;
@ -767,7 +746,8 @@ void _clRelease()
@return: mem_d
@date: 24/03/2011
------------------------------------------------------------*/
cl_mem _clMalloc(int size) throw(string){
cl_mem _clMalloc(int size) throw(string)
{
#ifdef TIMING
gettimeofday(&tv_mem_alloc_start, NULL);
#endif
@ -827,7 +807,8 @@ cl_mem _clMalloc(int size) throw(string){
@date: 06/04/2011
------------------------------------------------------------*/
void* _clMallocHost(int size)throw(string){
void* _clMallocHost(int size) throw(string)
{
void* mem_h;
oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);
#ifdef ERRMSG
@ -907,7 +888,8 @@ void* _clMallocHost(int size)throw(string){
@return: NULL
@date: 06/04/2011
------------------------------------------------------------*/
void _clFreeHost(int io, void * mem_h){
void _clFreeHost(int io, void* mem_h)
{
if (io == 0) { //in
if (mem_h) {
oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL);
@ -941,8 +923,7 @@ void _clFreeHost(int io, void * mem_h){
}
#endif
}
}
else if(io==1){ //out
} else if (io == 1) { //out
if (mem_h) {
oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL);
#ifdef ERRMSG
@ -975,8 +956,7 @@ void _clFreeHost(int io, void * mem_h){
}
#endif
}
}
else
} else
throw(string("encounter invalid choice when freeing pinned memmory"));
}
/*------------------------------------------------------------
@ -988,7 +968,8 @@ void _clFreeHost(int io, void * mem_h){
@return: NULL
@date: 17/01/2011
------------------------------------------------------------*/
void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
void _clMemcpyH2D(cl_mem dst, const void* src, int size) throw(string)
{
#ifdef PROFILE_
double t1 = gettime();
#endif
@ -1044,7 +1025,8 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
@return: NULL
@date: 17/01/2011
------------------------------------------------------------*/
void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
void _clMemcpyD2H(void* dst, cl_mem src, int size) throw(string)
{
#ifdef PROFILE_
double t1 = gettime();
#endif
@ -1099,7 +1081,8 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
@return: NULL
@date: 27/03/2011
------------------------------------------------------------*/
void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string)
{
#ifdef PROFILE_
double t1 = gettime();
#endif
@ -1165,7 +1148,8 @@ void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
@return: NULL
@date: 03/04/2011
------------------------------------------------------------*/
void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){
void _clSetArgs(int kernel_id, int arg_idx, void* d_mem, int size = 0) throw(string)
{
if (!size) {
oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem);
#ifdef ERRMSG
@ -1202,8 +1186,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
if (oclHandles.cl_status != CL_SUCCESS)
throw(oclHandles.error_str);
#endif
}
else{
} else {
oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
#ifdef ERRMSG
oclHandles.error_str = "excpetion in _clSetKernelArg()-2 ";
@ -1241,7 +1224,8 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
#endif
}
}
void _clFinish() throw(string){
void _clFinish() throw(string)
{
oclHandles.cl_status = clFinish(oclHandles.queue);
#ifdef ERRMSG
if (oclHandles.cl_status != CL_SUCCESS) {
@ -1273,7 +1257,8 @@ void _clFinish() throw(string){
@return: NULL
@date: 03/04/2011
------------------------------------------------------------*/
void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){
void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string)
{
#ifdef PROFILE_
double t1 = gettime();
#endif
@ -1283,7 +1268,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
work_items = work_items + (work_group_size - (work_items % work_group_size));
size_t local_work_size[] = { work_group_size, 1 };
size_t global_work_size[] = { work_items, 1 };
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
global_work_size, local_work_size, 0, 0, &(e[0]));
#ifdef ERRMSG
if (oclHandles.cl_status != CL_SUCCESS) {
@ -1389,7 +1374,8 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
@date: 03/04/2011
------------------------------------------------------------*/
void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
void _clMemset(cl_mem mem_d, short val, int number_bytes) throw(string)
{
int kernel_id = 0;
int arg_idx = 0;
_clSetArgs(kernel_id, arg_idx++, mem_d);
@ -1408,7 +1394,8 @@ void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
@return: NULL
@date: 03/04/2011
------------------------------------------------------------*/
void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){
void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string)
{
#ifdef PROFILE_
double t1 = gettime();
#endif
@ -1418,7 +1405,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
cl_event e[1];
/*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
work_items = work_items + (work_group_size-(work_items%work_group_size));*/
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
global_work_size, local_work_size, 0, 0, &(e[0]));
#ifdef ERRMSG
if (oclHandles.cl_status != CL_SUCCESS) {
@ -1496,7 +1483,8 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
@date: 03/04/2011
------------------------------------------------------------*/
void _clFree(cl_mem ob) throw(string){
void _clFree(cl_mem ob) throw(string)
{
#ifdef TIMING
gettimeofday(&tv_close_start, NULL);
#endif
@ -1543,7 +1531,8 @@ void _clFree(cl_mem ob) throw(string){
@return: NULL
@date: 03/04/2011
------------------------------------------------------------*/
void _clStatistics(){
void _clStatistics()
{
#ifdef PROFILE_
FILE* fp_pd = fopen("PD_OCL.txt", "a");
fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
@ -1552,7 +1541,8 @@ void _clStatistics(){
return;
}
void _clPrintTiming(){
void _clPrintTiming()
{
#ifdef TIMING
gettimeofday(&tv_total_end, NULL);
tvsub(&tv_total_end, &tv_total_start, &tv);
@ -1567,6 +1557,5 @@ void _clPrintTiming(){
printf("Close: %f\n", close_time);
printf("Total: %f\n", total_time);
#endif
}
#endif //_CL_HELPER_

View file

@ -12,10 +12,10 @@
on 24/03/2011
********************************************************************/
#include <iostream>
#include <fstream>
#include <math.h>
#include "CLHelper.h"
#include <fstream>
#include <iostream>
#include <math.h>
/*
* Options
@ -38,12 +38,10 @@
* not options
*/
#if block_length > 128
#warning "the kernels may fail too launch on some systems if the block length is too large"
#endif
#define VAR_DENSITY 0
#define VAR_MOMENTUM 1
#define VAR_DENSITY_ENERGY (VAR_MOMENTUM + NDIM)
@ -60,47 +58,52 @@ typedef struct{
* Generic functions
*/
template <typename T>
cl_mem alloc(int N){
cl_mem alloc(int N)
{
cl_mem mem_d = _clMalloc(sizeof(T) * N);
return mem_d;
}
template <typename T>
void dealloc(cl_mem array){
void dealloc(cl_mem array)
{
_clFree(array);
}
template <typename T>
void copy(cl_mem dst, cl_mem src, int N){
void copy(cl_mem dst, cl_mem src, int N)
{
_clMemcpyD2D(dst, src, N * sizeof(T));
}
template <typename T>
void upload(cl_mem dst, T* src, int N){
void upload(cl_mem dst, T* src, int N)
{
_clMemcpyH2D(dst, src, N * sizeof(T));
}
template <typename T>
void download(T* dst, cl_mem src, int N){
void download(T* dst, cl_mem src, int N)
{
_clMemcpyD2H(dst, src, N * sizeof(T));
}
void dump(cl_mem variables, int nel, int nelr){
void dump(cl_mem variables, int nel, int nelr)
{
float* h_variables = new float[nelr * NVAR];
download(h_variables, variables, nelr * NVAR);
{
std::ofstream file("density.txt");
file << nel << " " << nelr << std::endl;
for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY*nelr] << std::endl;
for (int i = 0; i < nel; i++)
file << h_variables[i + VAR_DENSITY * nelr] << std::endl;
}
{
std::ofstream file("momentum.txt");
file << nel << " " << nelr << std::endl;
for(int i = 0; i < nel; i++)
{
for (int i = 0; i < nel; i++) {
for (int j = 0; j != NDIM; j++)
file << h_variables[i + (VAR_MOMENTUM + j) * nelr] << " ";
file << std::endl;
@ -110,12 +113,14 @@ void dump(cl_mem variables, int nel, int nelr){
{
std::ofstream file("density_energy.txt");
file << nel << " " << nelr << std::endl;
for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY_ENERGY*nelr] << std::endl;
for (int i = 0; i < nel; i++)
file << h_variables[i + VAR_DENSITY_ENERGY * nelr] << std::endl;
}
delete[] h_variables;
}
void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string){
void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string)
{
int work_items = nelr;
int work_group_size = BLOCK_SIZE_1;
@ -127,7 +132,8 @@ void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(
_clInvokeKernel(kernel_id, work_items, work_group_size);
}
void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors){
void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors)
{
int work_items = nelr;
int work_group_size = BLOCK_SIZE_2;
@ -140,11 +146,12 @@ void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_f
_clInvokeKernel(kernel_id, work_items, work_group_size);
}
void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable, \
void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable,
cl_mem fluxes, cl_mem ff_flux_contribution_density_energy,
cl_mem ff_flux_contribution_momentum_x,
cl_mem ff_flux_contribution_momentum_y,
cl_mem ff_flux_contribution_momentum_z){
cl_mem ff_flux_contribution_momentum_z)
{
int work_items = nelr;
int work_group_size = BLOCK_SIZE_3;
@ -163,7 +170,8 @@ void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals
_clInvokeKernel(kernel_id, work_items, work_group_size);
}
void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes){
void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes)
{
int work_items = nelr;
int work_group_size = BLOCK_SIZE_4;
@ -184,7 +192,6 @@ inline void compute_flux_contribution(float& density, float3& momentum, float& d
fc_momentum_x.y = velocity.x * momentum.y;
fc_momentum_x.z = velocity.x * momentum.z;
fc_momentum_y.x = fc_momentum_x.y;
fc_momentum_y.y = velocity.y * momentum.y + pressure;
fc_momentum_y.z = velocity.y * momentum.z;
@ -202,11 +209,13 @@ inline void compute_flux_contribution(float& density, float3& momentum, float& d
/*
* Main function
*/
int main(int argc, char** argv){
int main(int argc, char** argv)
{
printf("WG size of kernel:initialize = %d, WG size of kernel:compute_step_factor = %d, WG size of kernel:compute_flux = %d, WG size of kernel:time_step = %d\n", BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4);
if (argc < 2) {
//std::cout << "specify data file name and [device type] [device id]" << std::endl;
printf("specify data file name and [device type] [device id]\n");
return 0;
}
const char* data_file_name = argv[1];
@ -263,7 +272,6 @@ int main(int argc, char** argv){
_clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3));
_clMemcpyH2D(ff_flux_contribution_density_energy, &h_ff_flux_contribution_density_energy, sizeof(float3));
_clFinish();
}
int nel;
int nelr;
@ -273,29 +281,26 @@ int main(int argc, char** argv){
//float* normals;
{
std::ifstream file(data_file_name);
if(!file.good()){
if ((file.rdstate() & std::ifstream::failbit) != 0) {
throw(string("can not find/open file!"));
}
file >> nel;
nelr = block_length * ((nel / block_length) + std::min(1, nel % block_length));
////std::cout<<"--cambine: nel="<<nel<<", nelr="<<nelr<<std::endl;
//std::cout << "--cambine: nel=" << nel << ", nelr=" << nelr << std::endl;
float* h_areas = new float[nelr];
int* h_elements_surrounding_elements = new int[nelr * NNB];
float* h_normals = new float[nelr * NDIM * NNB];
// read in data
for(int i = 0; i < nel; i++)
{
for (int i = 0; i < nel; i++) {
file >> h_areas[i];
for(int j = 0; j < NNB; j++)
{
for (int j = 0; j < NNB; j++) {
file >> h_elements_surrounding_elements[i + j * nelr];
if(h_elements_surrounding_elements[i+j*nelr] < 0) h_elements_surrounding_elements[i+j*nelr] = -1;
if (h_elements_surrounding_elements[i + j * nelr] < 0)
h_elements_surrounding_elements[i + j * nelr] = -1;
h_elements_surrounding_elements[i + j * nelr]--; //it's coming in with Fortran numbering
for(int k = 0; k < NDIM; k++)
{
for (int k = 0; k < NDIM; k++) {
file >> h_normals[i + (j + k * NNB) * nelr];
h_normals[i + (j + k * NNB) * nelr] = -h_normals[i + (j + k * NNB) * nelr];
}
@ -304,14 +309,13 @@ int main(int argc, char** argv){
// fill in remaining data
int last = nel - 1;
for(int i = nel; i < nelr; i++)
{
for (int i = nel; i < nelr; i++) {
h_areas[i] = h_areas[last];
for(int j = 0; j < NNB; j++)
{
for (int j = 0; j < NNB; j++) {
// duplicate the last element
h_elements_surrounding_elements[i + j * nelr] = h_elements_surrounding_elements[last + j * nelr];
for(int k = 0; k < NDIM; k++) h_normals[last + (j + k*NNB)*nelr] = h_normals[last + (j + k*NNB)*nelr];
for (int k = 0; k < NDIM; k++)
h_normals[last + (j + k * NNB) * nelr] = h_normals[last + (j + k * NNB) * nelr];
}
}
@ -343,15 +347,20 @@ int main(int argc, char** argv){
// make sure CUDA isn't still doing something before we start timing
_clFinish();
// these need to be computed the first time in order to compute time step
printf( "Starting...\n");
//std::cout << "Starting..." << std::endl;
printf("starting .. \n");
// Begin iterations
for (int i = 0; i < iterations; i++) {
copy<float>(old_variables, variables, nelr*NVAR);
//copy<float>(old_variables, variables, nelr * NVAR);
//download(ff_variable, old_variables, nelr * NVAR);
// upload(ff_variable, old_variables, nelr * NVAR);
_clMemcpyH2D(ff_variable, h_ff_variable, NVAR * sizeof(float));
_clMemcpyD2H(h_ff_variable, variables, NVAR * sizeof(float));
// for the first iteration we compute the time step
compute_step_factor(nelr, variables, areas, step_factors);
for (int j = 0; j < RK; j++) {
compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, \
compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy,
ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z);
time_step(j, nelr, old_variables, variables, step_factors, fluxes);
@ -363,6 +372,7 @@ int main(int argc, char** argv){
//std::cout << "Saved solution..." << std::endl;
_clStatistics();
//std::cout << "Cleaning up..." << std::endl;
printf("Cleaning up ...\n");
//--release resources
_clFree(ff_variable);
@ -378,11 +388,11 @@ int main(int argc, char** argv){
_clFree(fluxes);
_clFree(step_factors);
_clRelease();
printf("Done...\n");
//std::cout << "Done..." << std::endl;
_clPrintTiming();
}
catch(string msg){
printf("--cambine:( an exception catched in main body ->%s\n", msg.c_str());
} catch (string msg) {
//std::cout << "--cambine:( an exception catched in main body ->" << msg << std::endl;
printf("--cambine:( an exception catched in main body\n");
_clFree(ff_variable);
_clFree(ff_flux_contribution_momentum_x);
_clFree(ff_flux_contribution_momentum_y);
@ -396,8 +406,8 @@ int main(int argc, char** argv){
_clFree(fluxes);
_clFree(step_factors);
_clRelease();
}
catch(...){
} catch (...) {
printf("--cambine:( unknow exceptions in main body...\n");
//std::cout << "--cambine:( unknow exceptions in main body..." << std::endl;
_clFree(ff_variable);
_clFree(ff_flux_contribution_momentum_x);