mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
261 lines
No EOL
8.3 KiB
C++
Executable file
261 lines
No EOL
8.3 KiB
C++
Executable file
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <CL/cl.h>
|
|
|
|
#include "utils.h"
|
|
|
|
// This function takes a positive integer and rounds it up to
|
|
// the nearest multiple of another provided integer
|
|
unsigned int roundUp(unsigned int value, unsigned int multiple) {
|
|
|
|
// Determine how far past the nearest multiple the value is
|
|
unsigned int remainder = value % multiple;
|
|
|
|
// Add the difference to make the value a multiple
|
|
if(remainder != 0) {
|
|
value += (multiple-remainder);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
// This function reads in a text file and stores it as a char pointer
|
|
char* readSource(char* kernelPath) {
|
|
|
|
cl_int status;
|
|
FILE *fp;
|
|
char *source;
|
|
long int size;
|
|
|
|
printf("Program file is: %s\n", kernelPath);
|
|
|
|
fp = fopen(kernelPath, "rb");
|
|
if(!fp) {
|
|
printf("Could not open kernel file\n");
|
|
exit(-1);
|
|
}
|
|
status = fseek(fp, 0, SEEK_END);
|
|
if(status != 0) {
|
|
printf("Error seeking to end of file\n");
|
|
exit(-1);
|
|
}
|
|
size = ftell(fp);
|
|
if(size < 0) {
|
|
printf("Error getting file position\n");
|
|
exit(-1);
|
|
}
|
|
|
|
rewind(fp);
|
|
|
|
source = (char *)malloc(size + 1);
|
|
|
|
int i;
|
|
for (i = 0; i < size+1; i++) {
|
|
source[i]='\0';
|
|
}
|
|
|
|
if(source == NULL) {
|
|
printf("Error allocating space for the kernel source\n");
|
|
exit(-1);
|
|
}
|
|
|
|
fread(source, 1, size, fp);
|
|
source[size] = '\0';
|
|
|
|
return source;
|
|
}
|
|
|
|
void chk(cl_int status, const char* cmd) {
|
|
|
|
if(status != CL_SUCCESS) {
|
|
printf("%s failed (%d)\n", cmd, status);
|
|
exit(-1);
|
|
}
|
|
}
|
|
|
|
int main() {
|
|
|
|
int i, j, k, l;
|
|
|
|
// Rows and columns in the input image
|
|
int imageHeight;
|
|
int imageWidth;
|
|
|
|
const char* inputFile = "input.bmp";
|
|
const char* outputFile = "output.bmp";
|
|
|
|
// Homegrown function to read a BMP from file
|
|
float* inputImage = readImage(inputFile, &imageWidth,
|
|
&imageHeight);
|
|
|
|
// Size of the input and output images on the host
|
|
int dataSize = imageHeight*imageWidth*sizeof(float);
|
|
|
|
// Output image on the host
|
|
float* outputImage = NULL;
|
|
outputImage = (float*)malloc(dataSize);
|
|
float* refImage = NULL;
|
|
refImage = (float*)malloc(dataSize);
|
|
|
|
// 45 degree motion blur
|
|
float filter[49] =
|
|
{0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, -1, 0, 1, 0, 0,
|
|
0, 0, -2, 0, 2, 0, 0,
|
|
0, 0, -1, 0, 1, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0};
|
|
|
|
// The convolution filter is 7x7
|
|
int filterWidth = 7;
|
|
int filterSize = filterWidth*filterWidth; // Assume a square kernel
|
|
|
|
// Set up the OpenCL environment
|
|
cl_int status;
|
|
|
|
// Discovery platform
|
|
cl_platform_id platform;
|
|
status = clGetPlatformIDs(1, &platform, NULL);
|
|
chk(status, "clGetPlatformIDs");
|
|
|
|
// Discover device
|
|
cl_device_id device;
|
|
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
|
|
chk(status, "clGetDeviceIDs");
|
|
|
|
// Create context
|
|
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM,
|
|
(cl_context_properties)(platform), 0};
|
|
cl_context context;
|
|
context = clCreateContext(props, 1, &device, NULL, NULL, &status);
|
|
chk(status, "clCreateContext");
|
|
|
|
// Create command queue
|
|
cl_command_queue queue;
|
|
queue = clCreateCommandQueue(context, device, 0, &status);
|
|
chk(status, "clCreateCommandQueue");
|
|
|
|
// The image format describes how the data will be stored in memory
|
|
cl_image_format format;
|
|
format.image_channel_order = CL_R; // single channel
|
|
format.image_channel_data_type = CL_FLOAT; // float data type
|
|
|
|
// Create space for the source image on the device
|
|
cl_mem d_inputImage = clCreateImage2D(context, 0, &format, imageWidth,
|
|
imageHeight, 0, NULL, &status);
|
|
chk(status, "clCreateImage2D");
|
|
|
|
// Create space for the output image on the device
|
|
cl_mem d_outputImage = clCreateImage2D(context, 0, &format, imageWidth,
|
|
imageHeight, 0, NULL, &status);
|
|
chk(status, "clCreateImage2D");
|
|
|
|
// Create space for the 7x7 filter on the device
|
|
cl_mem d_filter = clCreateBuffer(context, 0, filterSize*sizeof(float),
|
|
NULL, &status);
|
|
chk(status, "clCreateBuffer");
|
|
|
|
// Copy the source image to the device
|
|
size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from
|
|
size_t region[3] = {imageWidth, imageHeight, 1}; // Elements to per dimension
|
|
status = clEnqueueWriteImage(queue, d_inputImage, CL_FALSE, origin, region,
|
|
0, 0, inputImage, 0, NULL, NULL);
|
|
chk(status, "clEnqueueWriteImage");
|
|
|
|
// Copy the 7x7 filter to the device
|
|
status = clEnqueueWriteBuffer(queue, d_filter, CL_FALSE, 0,
|
|
filterSize*sizeof(float), filter, 0, NULL, NULL);
|
|
chk(status, "clEnqueueWriteBuffer");
|
|
|
|
// Create the image sampler
|
|
cl_sampler sampler = clCreateSampler(context, CL_FALSE,
|
|
CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &status);
|
|
chk(status, "clCreateSampler");
|
|
|
|
const char* source = readSource("kernel.cl");
|
|
|
|
// Create a program object with source and build it
|
|
cl_program program;
|
|
program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
|
|
chk(status, "clCreateProgramWithSource");
|
|
status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
|
|
chk(status, "clBuildProgram");
|
|
|
|
// Create the kernel object
|
|
cl_kernel kernel;
|
|
kernel = clCreateKernel(program, "convolution", &status);
|
|
chk(status, "clCreateKernel");
|
|
|
|
// Set the kernel arguments
|
|
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage);
|
|
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage);
|
|
status |= clSetKernelArg(kernel, 2, sizeof(int), &imageHeight);
|
|
status |= clSetKernelArg(kernel, 3, sizeof(int), &imageWidth);
|
|
status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter);
|
|
status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth);
|
|
status |= clSetKernelArg(kernel, 6, sizeof(cl_sampler), &sampler);
|
|
chk(status, "clSetKernelArg");
|
|
|
|
// Set the work item dimensions
|
|
size_t globalSize[2] = {imageWidth, imageHeight};
|
|
status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0,
|
|
NULL, NULL);
|
|
chk(status, "clEnqueueNDRange");
|
|
|
|
// Read the image back to the host
|
|
status = clEnqueueReadImage(queue, d_outputImage, CL_TRUE, origin,
|
|
region, 0, 0, outputImage, 0, NULL, NULL);
|
|
chk(status, "clEnqueueReadImage");
|
|
|
|
// Write the output image to file
|
|
storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile);
|
|
|
|
// Compute the reference image
|
|
for(i = 0; i < imageHeight; i++) {
|
|
for(j = 0; j < imageWidth; j++) {
|
|
refImage[i*imageWidth+j] = 0;
|
|
}
|
|
}
|
|
|
|
// Iterate over the rows of the source image
|
|
int halfFilterWidth = filterWidth/2;
|
|
float sum;
|
|
for(i = 0; i < imageHeight; i++) {
|
|
// Iterate over the columns of the source image
|
|
for(j = 0; j < imageWidth; j++) {
|
|
sum = 0; // Reset sum for new source pixel
|
|
// Apply the filter to the neighborhood
|
|
for(k = - halfFilterWidth; k <= halfFilterWidth; k++) {
|
|
for(l = - halfFilterWidth; l <= halfFilterWidth; l++) {
|
|
if(i+k >= 0 && i+k < imageHeight &&
|
|
j+l >= 0 && j+l < imageWidth) {
|
|
sum += inputImage[(i+k)*imageWidth + j+l] *
|
|
filter[(k+halfFilterWidth)*filterWidth +
|
|
l+halfFilterWidth];
|
|
}
|
|
}
|
|
}
|
|
refImage[i*imageWidth+j] = sum;
|
|
}
|
|
}
|
|
|
|
int failed = 0;
|
|
for(i = 0; i < imageHeight; i++) {
|
|
for(j = 0; j < imageWidth; j++) {
|
|
if(abs(outputImage[i*imageWidth+j]-refImage[i*imageWidth+j]) > 0.01) {
|
|
printf("Results are INCORRECT\n");
|
|
printf("Pixel mismatch at <%d,%d> (%f vs. %f)\n", i, j,
|
|
outputImage[i*imageWidth+j], refImage[i*imageWidth+j]);
|
|
failed = 1;
|
|
}
|
|
if(failed) break;
|
|
}
|
|
if(failed) break;
|
|
}
|
|
if(!failed) {
|
|
printf("Results are correct\n");
|
|
}
|
|
|
|
return 0;
|
|
} |