This commit is contained in:
Blaise Tine 2019-11-25 04:49:55 -05:00
parent 54ece84f79
commit f2dd612078
34 changed files with 5627 additions and 37 deletions

View file

@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=bfs
PROJECT = bfs
SRCS = main.cc
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -0,0 +1,68 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT = gaussian
SRCS = main.cc clutils.cpp utils.cpp
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -0,0 +1,241 @@
/*-----------------------------------------------------------
** ge_p.c -- The program is to solve a linear system Ax = b
** by using Gaussian Elimination. The algorithm on page 101
** ("Foundations of Parallel Programming") is used.
** The sequential version is ge_s.c. This parallel
** implementation converts three independent for() loops
** into three Fans. Use the data file ge_3.dat to verify
** the correction of the output.
**
** Written by Andreas Kura, 02/15/95
** Modified by Chong-wei Xu, /04/20/95
**-----------------------------------------------------------
*/
#include <us.h>
#include <stdio.h>
int Size, t;
float **a, *b;
BEGIN_SHARED_DECL
float **m;
END_SHARED_DECL;
FILE *fp;
void InitProblemOnce();
void InitPerRun();
void ForwardSub();
void Fan1();
void Fan2();
void Fan3();
void InitMat();
void InitAry();
void PrintMat();
void PrintAry();
main ()
{
InitializeUs();
MakeSharedVariables; /* to make SHARED m */
InitProblemOnce();
InitPerRun();
ForwardSub();
printf("The result of matrix m is: \n");
PrintMat(SHARED m, Size, Size);
printf("The result of matrix a is: \n");
PrintMat(a, Size, Size);
printf("The result of array b is: \n");
PrintAry(b, Size);
}
/*------------------------------------------------------
** InitProblemOnce -- Initialize all of matrices and
** vectors by opening a data file specified by the user.
**
** We used dynamic array **a, *b, and **m to allocate
** the memory storages.
**------------------------------------------------------
*/
void InitProblemOnce()
{
char filename[30];
printf("Enter the data file name: ");
scanf("%s", filename);
printf("The file name is: %s\n", filename);
fp = fopen(filename, "r");
fscanf(fp, "%d", &Size);
a = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float));
/*
a = (float **) malloc(Size * sizeof(float *));
for (i=0; i<Size; i++) {
a[i] = (float *) malloc(Size * sizeof(float));
}
*/
InitMat(a, Size, Size);
printf("The input matrix a is:\n");
PrintMat(a, Size, Size);
b = (float *) UsAlloc(Size * sizeof(float));
/*
b = (float *) malloc(Size * sizeof(float));
*/
InitAry(b, Size);
printf("The input array b is:\n");
PrintAry(b, Size);
SHARED m = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float));
/*
m = (float **) malloc(Size * sizeof(float *));
for (i=0; i<Size; i++) {
m[i] = (float *) malloc(Size * sizeof(float));
}
*/
Share(&Size);
Share(&a);
Share(&b);
}
/*------------------------------------------------------
** InitPerRun() -- Initialize the contents of the
** multipier matrix **m
**------------------------------------------------------
*/
void InitPerRun()
{
int i, j;
for (i=0; i<Size; i++)
for (j=0; j<Size; j++)
SHARED m[i][j] = 0.0;
}
/*------------------------------------------------------
** ForwardSub() -- Forward substitution of Gaussian
** elimination.
**------------------------------------------------------
*/
void ForwardSub()
{
for (t=0; t<(Size-1); t++) {
Share(&t);
GenOnI(Fan1, Size-1-t); /* t=0 to (Size-2), the range is
** Size-2-t+1 = Size-1-t
*/
GenOnA(Fan2, Size-1-t, Size-t);
GenOnI(Fan3, Size-1-t);
}
}
/*-------------------------------------------------------
** Fan1() -- Calculate multiplier matrix
** Pay attention to the index. Index i give the range
** which starts from 0 to range-1. The real values of
** the index should be adjust and related with the value
** of t which is defined on the ForwardSub().
**-------------------------------------------------------
*/
void Fan1(dummy, i)
int dummy, i;
{
/* Use these printf() to display the nodes and index */
printf("from node #%d\n", PhysProcToUsProc(Proc_Node));
SHARED m[i+t+1][t] = a[i+t+1][t] / a[t][t];
printf("i=%d, a[%d][%d]=%.2f, a[%d][%d]=%.2f, m[%d][%d]=%.2f\n",
(i+t+1),t,t,a[t][t],(i+t+1),t,a[i+t+1][t],(i+t+1),t,
SHARED m[i+t+1][t]);
}
/*-------------------------------------------------------
** Fan2() -- Modify the matrix A into LUD
**-------------------------------------------------------
*/
void Fan2(dummy, i, j)
int dummy, i, j;
{
a[i+1+t][j+t] -= SHARED m[i+1+t][t] * a[t][j+t];
Share (&a);
}
/*-------------------------------------------------------
** Fan3() -- Modify the array b
**-------------------------------------------------------
*/
void Fan3(dummy, i)
int dummy, i;
{
b[i+1+t] -= SHARED m[i+1+t][t] * b[t];
}
/*------------------------------------------------------
** InitMat() -- Initialize the matrix by reading data
** from the data file
**------------------------------------------------------
*/
void InitMat(ary, nrow, ncol)
float **ary;
int nrow, ncol;
{
int i, j;
for (i=0; i<nrow; i++) {
for (j=0; j<ncol; j++) {
fscanf(fp, "%f", &ary[i][j]);
}
}
}
/*------------------------------------------------------
** PrintMat() -- Print the contents of the matrix
**------------------------------------------------------
*/
void PrintMat(ary, nrow, ncol)
float **ary;
int nrow, ncol;
{
int i, j;
for (i=0; i<nrow; i++) {
for (j=0; j<ncol; j++) {
printf("%8.2f ", ary[i][j]);
}
printf("\n");
}
printf("\n");
}
/*------------------------------------------------------
** InitAry() -- Initialize the array (vector) by reading
** data from the data file
**------------------------------------------------------
*/
void InitAry(ary, ary_size)
float *ary;
int ary_size;
{
int i;
for (i=0; i<ary_size; i++) {
fscanf(fp, "%f", &ary[i]);
}
}
/*------------------------------------------------------
** PrintAry() -- Print the contents of the array (vector)
**------------------------------------------------------
*/
void PrintAry(ary, ary_size)
float *ary;
int ary_size;
{
int i;
for (i=0; i<ary_size; i++) {
printf("%.2f ", ary[i]);
}
printf("\n");
}

View file

@ -0,0 +1,60 @@
The Gaussian Elimination application solves systems of equations using the
gaussian elimination method.
The application analyzes an n x n matrix and an associated 1 x n vector to solve a
set of equations with n variables and n unknowns. The matrix and vector describe equations
of the form:
a0x + b0y + c0z + d0w = e0
a1x + b1y + c1z + d1w = e1
a2x + b2y + c2z + d2w = e2
a3x + b3y + c3z + d3w = e3
where in this case n=4. The matrix for the above equations would be as follows:
[a0 b0 c0 d0]
[a1 b1 c1 d1]
[a2 b2 c2 d2]
[a3 b3 c3 d3]
and the vector would be:
[e0]
[e1]
[e2]
[e3]
The application creates a solution vector:
[x]
[y]
[z]
[w]
The Makefile may need to be adjusted for different machines, but it was written for Mac OS X and
Linux with either NVIDIA or AMD OpenCL SDKs.
Additional input files can be created with the matrixGenerator.py file in the data folder.
Gaussian Elimination Usage
gaussianElimination [filename] [-hqt] [-p [int] -d [int]]
example:
$ ./gaussianElimination matrix4.txt
filename the filename that holds the matrix data
-h, --help Display the help file
-q Quiet mode. Suppress all text output.
-t Print timing information.
-p [int] Choose the platform (must choose both platform and device)
-d [int] Choose the device (must choose both platform and device)
Notes: 1. The filename is required as the first parameter.
2. If you declare either the device or the platform,
you must declare both.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,281 @@
/****************************************************************************\
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
* All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions *
* are met: *
* *
* Redistributions of source code must retain the above copyright notice, *
* this list of conditions and the following disclaimer. *
* *
* Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* Neither the name of the copyright holder nor the names of its contributors *
* may be used to endorse or promote products derived from this software *
* without specific prior written permission. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* If you use the software (in whole or in part), you shall adhere to all *
* applicable U.S., European, and other export laws, including but not *
* limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. *
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
* hereby certify that, except pursuant to a license granted by the United *
* States Department of Commerce Bureau of Industry and Security or as *
* otherwise permitted pursuant to a License Exception under the U.S. Export *
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
* restricted technology, software, or source code you receive hereunder, *
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
* technology or software, if such foreign produced direct product is subject *
* to national security controls as identified on the Commerce Control List *
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
* Country Group listings, or for additional information about the EAR or *
* your obligations under those regulations, please refer to the U.S. Bureau *
* of Industry and Security’s website at http://www.bis.doc.gov/. *
\****************************************************************************/
#ifndef __CL_UTILS_H__
#define __CL_UTILS_H__
#include <CL/cl.h>
// The cl_time type is OS specific
#ifdef _WIN32
#include <tchar.h>
#include <Windows.h>
typedef __int64 cl_time;
#else
#include <sys/time.h>
typedef double cl_time;
#endif
//-------------------------------------------------------
// Initialization and Cleanup
//-------------------------------------------------------
// Detects platforms and devices, creates context and command queue
cl_context cl_init(char devicePreference='\0');
// Creates a context given a platform and a device
cl_context cl_init_context(int platform,int dev,int quiet=0);
// Releases resources used by clutils
void cl_cleanup();
// Releases a kernel object
void cl_freeKernel(cl_kernel kernel);
// Releases a memory object
void cl_freeMem(cl_mem mem);
// Releases a program object
void cl_freeProgram(cl_program program);
// Returns the global command queue
cl_command_queue cl_getCommandQueue();
//-------------------------------------------------------
// Synchronization functions
//-------------------------------------------------------
// Performs a clFinish on the command queue
void cl_sync();
//-------------------------------------------------------
// Memory allocation
//-------------------------------------------------------
// Allocates a regular buffer on the device
cl_mem cl_allocBuffer(size_t mem_size,
cl_mem_flags flags = CL_MEM_READ_WRITE);
// XXX I don't think this does exactly what we want it to do
// Allocates a read-only buffer and transfers the data
cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr);
// Allocates pinned memory on the host
cl_mem cl_allocBufferPinned(size_t mem_size);
// Allocates an image on the device
cl_mem cl_allocImage(size_t height, size_t width, char type,
cl_mem_flags flags = CL_MEM_READ_WRITE);
//-------------------------------------------------------
// Data transfers
//-------------------------------------------------------
// Copies a buffer from the device to pinned memory on the host and
// maps it so it can be read
void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size);
// Copies from one buffer to another
void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size);
// Copies data to a buffer on the device
void cl_copyBufferToDevice(cl_mem dst, void *src, size_t mem_size,
cl_bool blocking = CL_TRUE);
// Copies data to an image on the device
void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width);
// Copies an image from the device to the host
void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width);
// Copies data from a device buffer to the host
void cl_copyBufferToHost(void *dst, cl_mem src, size_t mem_size,
cl_bool blocking = CL_TRUE);
// Copies data from a buffer on the device to an image on the device
void cl_copyBufferToImage(cl_mem src, cl_mem dst, int height, int width);
// Maps a buffer
void* cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags);
// Unmaps a buffer
void cl_unmapBuffer(cl_mem mem, void *ptr);
// Writes data to a zero-copy buffer on the device
void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size);
//-------------------------------------------------------
// Program and kernels
//-------------------------------------------------------
// Compiles a program
cl_program cl_compileProgram(char* kernelPath, char* compileoptions,
bool verboseoptions = 0);
// Creates a kernel
cl_kernel cl_createKernel(cl_program program, const char* kernelName);
// Sets a kernel argument
void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size,
void* data);
//-------------------------------------------------------
// Profiling/events
//-------------------------------------------------------
// Computes the execution time (start to end) for an event
double cl_computeExecTime(cl_event);
// Compute the elapsed time between two CPU timer values
double cl_computeTime(cl_time start, cl_time end);
// Creates an event from CPU timers
void cl_createUserEvent(cl_time start, cl_time end, char* desc);
// Disable logging of events
void cl_disableEvents();
// Enable logging of events
void cl_enableEvents();
// Query the current system time
void cl_getTime(cl_time* time);
// Calls a function which prints events to the terminal
void cl_printEvents();
// Calls a function which writes the events to a file
void cl_writeEventsToFile(char* path);
//-------------------------------------------------------
// Error handling
//-------------------------------------------------------
// Compare a status value to CL_SUCCESS and optionally exit on error
int cl_errChk(const cl_int status, const char *msg, bool exitOnErr);
// Queries the supported image formats for the device and prints
// them to the screen
void printSupportedImageFormats();
//-------------------------------------------------------
// Platform and device information
//-------------------------------------------------------
bool cl_deviceIsAMD(cl_device_id dev=NULL);
bool cl_deviceIsNVIDIA(cl_device_id dev=NULL);
bool cl_platformIsNVIDIA(cl_platform_id plat=NULL);
char* cl_getDeviceDriverVersion(cl_device_id dev=NULL);
char* cl_getDeviceName(cl_device_id dev=NULL);
char* cl_getDeviceVendor(cl_device_id dev=NULL);
char* cl_getDeviceVersion(cl_device_id dev=NULL);
char* cl_getPlatformName(cl_platform_id platform);
char* cl_getPlatformVendor(cl_platform_id platform);
//-------------------------------------------------------
// Utility functions
//-------------------------------------------------------
char* catStringWithInt(const char* str, int integer);
char* itoa_portable(int value, char* result, int base);
//-------------------------------------------------------
// Data types
//-------------------------------------------------------
typedef struct{
int x;
int y;
} int2;
typedef struct{
float x;
float y;
}float2;
typedef struct{
float x;
float y;
float z;
float w;
}float4;
//-------------------------------------------------------
// Defines
//-------------------------------------------------------
#define MAX_ERR_VAL 64
#define NUM_PROGRAMS 7
#define NUM_KERNELS 13
#define KERNEL_INIT_DET 0
#define KERNEL_BUILD_DET 1
#define KERNEL_SURF_DESC 2
#define KERNEL_NORM_DESC 3
#define KERNEL_NON_MAX_SUP 4
#define KERNEL_GET_ORIENT1 5
#define KERNEL_GET_ORIENT2 6
#define KERNEL_NN 7
#define KERNEL_SCAN 8
#define KERNEL_SCAN4 9
#define KERNEL_TRANSPOSE 10
#define KERNEL_SCANIMAGE 11
#define KERNEL_TRANSPOSEIMAGE 12
#endif

View file

@ -0,0 +1,40 @@
#ifndef _GAUSSIANELIM
#define _GAUSSIANELIM
#include <iostream>
#include <vector>
#include <float.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include "clutils.h"
// All OpenCL headers
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
float *OpenClGaussianElimination(
cl_context context,
int timing);
void printUsage();
int parseCommandline(int argc, char *argv[], char* filename,
int *q, int *t, int *p, int *d);
void InitPerRun(int size,float *m);
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,int timing);
void BackSub(float *a, float *b, float *finalVec, int size);
void Fan1(float *m, float *a, int Size, int t);
void Fan2(float *m, float *a, float *b,int Size, int j1, int t);
//void Fan3(float *m, float *b, int Size, int t);
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol);
void InitAry(FILE *fp, float *ary, int ary_size);
void PrintMat(float *ary, int size, int nrow, int ncolumn);
void PrintAry(float *ary, int ary_size);
float eventTime(cl_event event,cl_command_queue command_queue);
#endif

View file

@ -0,0 +1,74 @@
#include "stdio.h"
#include <time.h>
#include <windows.h>
#include <iostream>
//using namespace System;
using namespace std;
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
#else
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
struct timezone
{
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */
};
// Definition of a gettimeofday function
int gettimeofday(struct timeval *tv, struct timezone *tz)
{
// Define a structure to receive the current Windows filetime
FILETIME ft;
// Initialize the present time to 0 and the timezone to UTC
unsigned __int64 tmpres = 0;
static int tzflag = 0;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
// The GetSystemTimeAsFileTime returns the number of 100 nanosecond
// intervals since Jan 1, 1601 in a structure. Copy the high bits to
// the 64 bit tmpres, shift it left by 32 then or in the low 32 bits.
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
// Convert to microseconds by dividing by 10
tmpres /= 10;
// The Unix epoch starts on Jan 1 1970. Need to subtract the difference
// in seconds from Jan 1 1601.
tmpres -= DELTA_EPOCH_IN_MICROSECS;
// Finally change microseconds to seconds and place in the seconds value.
// The modulus picks up the microseconds.
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
if (NULL != tz)
{
if (!tzflag)
{
_tzset();
tzflag++;
}
// Adjust for the timezone west of Greenwich
long seconds_diff;
_get_timezone(&seconds_diff);
tz->tz_minuteswest = seconds_diff / 60;
int hours_offset;
_get_daylight(&hours_offset);
tz->tz_dsttime = hours_offset;
}
return 0;
}

View file

@ -0,0 +1,17 @@
#ifdef _WIN32
#include <WinSock.h>
/**
Based on code seen at.
http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html
http://msdn.microsoft.com/en-us/library/ms740560
*/
int gettimeofday(struct timeval *tv, struct timezone *tz);
#else
#include <sys/time.h>
#endif

View file

@ -0,0 +1,49 @@
//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
typedef struct latLong
{
float lat;
float lng;
} LatLong;
__kernel void Fan1(__global float *m_dev,
__global float *a_dev,
__global float *b_dev,
const int size,
const int t) {
int globalId = get_global_id(0);
if (globalId < size-1-t) {
*(m_dev + size * (globalId + t + 1)+t) = *(a_dev + size * (globalId + t + 1) + t) / *(a_dev + size * t + t);
}
}
__kernel void Fan2(__global float *m_dev,
__global float *a_dev,
__global float *b_dev,
const int size,
const int t) {
int globalId = get_global_id(0);
int globalIdx = get_global_id(0);
int globalIdy = get_global_id(1);
if (globalIdx < size-1-t && globalIdy < size-t) {
a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)];
if(globalIdy == 0){
b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t];
}
}
// One dimensional
// int globalIdx = globalId % size;
// int globalIdy = globalId / size;
//
// if (globalIdx < size-1-t && globalIdy < size-t) {
// a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)];
// }
// if(globalIdy == 0){
// b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t];
// }
}

Binary file not shown.

View file

@ -0,0 +1,412 @@
#ifndef __GAUSSIAN_ELIMINATION__
#define __GAUSSIAN_ELIMINATION__
#include "gaussianElim.h"
cl_context context = NULL;
int main(int argc, char *argv[]) {
printf("enter demo main\n");
float *a = NULL, *b = NULL, *finalVec = NULL;
float *m = NULL;
int size;
FILE *fp;
// args
char filename[100];
int quiet = 0, timing = 0, platform = -1, device = -1;
// parse command line
if (parseCommandline(argc, argv, filename, &quiet, &timing, &platform,
&device)) {
printUsage();
return 0;
}
context = cl_init_context(platform, device, quiet);
fp = fopen(filename, "r");
fscanf(fp, "%d", &size);
a = (float *)malloc(size * size * sizeof(float));
printf("OK\n");
InitMat(fp, size, a, size, size);
// printf("The input matrix a is:\n");
// PrintMat(a, size, size, size);
b = (float *)malloc(size * sizeof(float));
InitAry(fp, b, size);
// printf("The input array b is:\n");
// PrintAry(b, size);
// create the solution matrix
m = (float *)malloc(size * size * sizeof(float));
// create a new vector to hold the final answer
finalVec = (float *)malloc(size * sizeof(float));
InitPerRun(size, m);
// begin timing
// run kernels
ForwardSub(context, a, b, m, size, timing);
// end timing
if (!quiet) {
printf("The result of matrix m is: \n");
PrintMat(m, size, size, size);
printf("The result of matrix a is: \n");
PrintMat(a, size, size, size);
printf("The result of array b is: \n");
PrintAry(b, size);
BackSub(a, b, finalVec, size);
printf("The final solution is: \n");
PrintAry(finalVec, size);
}
fclose(fp);
free(m);
free(a);
free(b);
free(finalVec);
// OpenClGaussianElimination(context,timing);
return 0;
}
/*------------------------------------------------------
** ForwardSub() -- Forward substitution of Gaussian
** elimination.
**------------------------------------------------------
*/
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
int timing) {
// 1. set up kernels
cl_kernel fan1_kernel, fan2_kernel;
cl_int status = 0;
cl_program gaussianElim_program;
cl_event writeEvent, kernelEvent, readEvent;
float writeTime = 0, readTime = 0, kernelTime = 0;
float writeMB = 0, readMB = 0;
gaussianElim_program =
cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL);
fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status);
status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true);
if (status)
exit(1);
fan2_kernel = clCreateKernel(gaussianElim_program, "Fan2", &status);
status = cl_errChk(status, (char *)"Error Creating Fan2 kernel", true);
if (status)
exit(1);
// 2. set up memory on device and send ipts data to device
cl_mem a_dev, b_dev, m_dev;
cl_int error = 0;
a_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(float) * size * size, NULL, &error);
b_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * size, NULL,
&error);
m_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(float) * size * size, NULL, &error);
cl_command_queue command_queue = cl_getCommandQueue();
error = clEnqueueWriteBuffer(command_queue, a_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size * size, a, 0, NULL,
&writeEvent);
if (timing)
writeTime += eventTime(writeEvent, command_queue);
clReleaseEvent(writeEvent);
error = clEnqueueWriteBuffer(command_queue, b_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size, b, 0, NULL, &writeEvent);
if (timing)
writeTime += eventTime(writeEvent, command_queue);
clReleaseEvent(writeEvent);
error = clEnqueueWriteBuffer(command_queue, m_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size * size, m, 0, NULL,
&writeEvent);
if (timing)
writeTime += eventTime(writeEvent, command_queue);
clReleaseEvent(writeEvent);
writeMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
// 3. Determine block sizes
size_t globalWorksizeFan1[1];
size_t globalWorksizeFan2[2];
globalWorksizeFan1[0] = size;
globalWorksizeFan2[0] = size;
globalWorksizeFan2[1] = size;
int t;
// 4. Setup and Run kernels
for (t = 0; t < (size - 1); t++) {
// kernel args
cl_int argchk;
argchk = clSetKernelArg(fan1_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
argchk |= clSetKernelArg(fan1_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
argchk |= clSetKernelArg(fan1_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
argchk |= clSetKernelArg(fan1_kernel, 3, sizeof(int), (void *)&size);
argchk |= clSetKernelArg(fan1_kernel, 4, sizeof(int), (void *)&t);
cl_errChk(argchk, "ERROR in Setting Fan1 kernel args", true);
// launch kernel
error =
clEnqueueNDRangeKernel(command_queue, fan1_kernel, 1, 0,
globalWorksizeFan1, NULL, 0, NULL, &kernelEvent);
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
if (timing) {
// printf("here1a\n");
kernelTime += eventTime(kernelEvent, command_queue);
// printf("here1b\n");
}
clReleaseEvent(kernelEvent);
// Fan1<<<dimGrid,dimBlock>>>(m_cuda,a_cuda,Size,t);
// cudaThreadSynchronize();
// kernel args
argchk = clSetKernelArg(fan2_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
argchk |= clSetKernelArg(fan2_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
argchk |= clSetKernelArg(fan2_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
argchk |= clSetKernelArg(fan2_kernel, 3, sizeof(int), (void *)&size);
argchk |= clSetKernelArg(fan2_kernel, 4, sizeof(int), (void *)&t);
cl_errChk(argchk, "ERROR in Setting Fan2 kernel args", true);
// launch kernel
error =
clEnqueueNDRangeKernel(command_queue, fan2_kernel, 2, 0,
globalWorksizeFan2, NULL, 0, NULL, &kernelEvent);
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
if (timing) {
// printf("here2a\n");
kernelTime += eventTime(kernelEvent, command_queue);
// printf("here2b\n");
}
clReleaseEvent(kernelEvent);
// Fan2<<<dimGridXY,dimBlockXY>>>(m_cuda,a_cuda,b_cuda,Size,Size-t,t);
// cudaThreadSynchronize();
}
// 5. transfer data off of device
error =
clEnqueueReadBuffer(command_queue, a_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size * size, a, 0, NULL, &readEvent);
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
if (timing)
readTime += eventTime(readEvent, command_queue);
clReleaseEvent(readEvent);
error = clEnqueueReadBuffer(command_queue, b_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size, b, 0, NULL, &readEvent);
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
if (timing)
readTime += eventTime(readEvent, command_queue);
clReleaseEvent(readEvent);
error =
clEnqueueReadBuffer(command_queue, m_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size * size, m, 0, NULL, &readEvent);
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
if (timing)
readTime += eventTime(readEvent, command_queue);
clReleaseEvent(readEvent);
readMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
if (timing) {
printf("Matrix Size\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
"[size]\t\tTotal(s)\n");
printf("%dx%d \t", size, size);
printf("%f [%.2fMB]\t", writeTime, writeMB);
printf("%f\t", kernelTime);
printf("%f [%.2fMB]\t", readTime, readMB);
printf("%f\n\n", writeTime + kernelTime + readTime);
}
}
float eventTime(cl_event event, cl_command_queue command_queue) {
cl_int error = 0;
cl_ulong eventStart, eventEnd;
clFinish(command_queue);
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &eventStart, NULL);
cl_errChk(error, "ERROR in Event Profiling.", true);
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &eventEnd, NULL);
cl_errChk(error, "ERROR in Event Profiling.", true);
return (float)((eventEnd - eventStart) / 1e9);
}
int parseCommandline(int argc, char *argv[], char *filename, int *q, int *t,
int *p, int *d) {
int i;
// if (argc < 2) return 1; // error
strncpy(filename, "matrix4.txt", 100);
char flag;
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-') { // flag
flag = argv[i][1];
switch (flag) {
case 'h': // help
return 1;
break;
case 'q': // quiet
*q = 1;
break;
case 't': // timing
*t = 1;
break;
case 'p': // platform
i++;
*p = atoi(argv[i]);
break;
case 'd': // device
i++;
*d = atoi(argv[i]);
break;
}
}
}
if ((*d >= 0 && *p < 0) ||
(*p >= 0 &&
*d < 0)) // both p and d must be specified if either are specified
return 1;
return 0;
}
void printUsage() {
printf("Gaussian Elimination Usage\n");
printf("\n");
printf("gaussianElimination [filename] [-hqt] [-p [int] -d [int]]\n");
printf("\n");
printf("example:\n");
printf("$ ./gaussianElimination matrix4.txt\n");
printf("\n");
printf("filename the filename that holds the matrix data\n");
printf("\n");
printf("-h Display the help file\n");
printf("-q Quiet mode. Suppress all text output.\n");
printf("-t Print timing information.\n");
printf("\n");
printf("-p [int] Choose the platform (must choose both platform and "
"device)\n");
printf("-d [int] Choose the device (must choose both platform and "
"device)\n");
printf("\n");
printf("\n");
printf("Notes: 1. The filename is required as the first parameter.\n");
printf(" 2. If you declare either the device or the platform,\n");
printf(" you must declare both.\n\n");
}
/*------------------------------------------------------
** InitPerRun() -- Initialize the contents of the
** multipier matrix **m
**------------------------------------------------------
*/
void InitPerRun(int size, float *m) {
int i;
for (i = 0; i < size * size; i++)
*(m + i) = 0.0;
}
void BackSub(float *a, float *b, float *finalVec, int size) {
// solve "bottom up"
int i, j;
for (i = 0; i < size; i++) {
finalVec[size - i - 1] = b[size - i - 1];
for (j = 0; j < i; j++) {
finalVec[size - i - 1] -= *(a + size * (size - i - 1) + (size - j - 1)) *
finalVec[size - j - 1];
}
finalVec[size - i - 1] =
finalVec[size - i - 1] / *(a + size * (size - i - 1) + (size - i - 1));
}
}
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol) {
int i, j;
for (i = 0; i < nrow; i++) {
for (j = 0; j < ncol; j++) {
fscanf(fp, "%f", ary + size * i + j);
}
}
}
/*------------------------------------------------------
** InitAry() -- Initialize the array (vector) by reading
** data from the data file
**------------------------------------------------------
*/
void InitAry(FILE *fp, float *ary, int ary_size) {
int i;
for (i = 0; i < ary_size; i++) {
fscanf(fp, "%f", &ary[i]);
}
}
/*------------------------------------------------------
** PrintMat() -- Print the contents of the matrix
**------------------------------------------------------
*/
void PrintMat(float *ary, int size, int nrow, int ncol) {
int i, j;
for (i = 0; i < nrow; i++) {
for (j = 0; j < ncol; j++) {
printf("%8.2f ", *(ary + size * i + j));
}
printf("\n");
}
printf("\n");
}
/*------------------------------------------------------
** PrintAry() -- Print the contents of the array (vector)
**------------------------------------------------------
*/
void PrintAry(float *ary, int ary_size) {
int i;
for (i = 0; i < ary_size; i++) {
printf("%.2f ", ary[i]);
}
printf("\n\n");
}
#endif

View file

@ -0,0 +1,11 @@
4
-0.6 -0.5 0.7 0.3
-0.3 -0.9 0.3 0.7
-0.4 -0.5 -0.3 -0.8
0.0 -0.1 0.2 0.9
-0.85 -0.68 0.24 -0.53
0.7 0.0 -0.4 -0.5

1
benchmarks/opencl/guassian/run Executable file
View file

@ -0,0 +1 @@
./gaussian ../../data/gaussian/matrix4.txt

View file

@ -0,0 +1,204 @@
/****************************************************************************\
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
* All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions *
* are met: *
* *
* Redistributions of source code must retain the above copyright notice, *
* this list of conditions and the following disclaimer. *
* *
* Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* Neither the name of the copyright holder nor the names of its contributors *
* may be used to endorse or promote products derived from this software *
* without specific prior written permission. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* If you use the software (in whole or in part), you shall adhere to all *
* applicable U.S., European, and other export laws, including but not *
* limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. *
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
* hereby certify that, except pursuant to a license granted by the United *
* States Department of Commerce Bureau of Industry and Security or as *
* otherwise permitted pursuant to a License Exception under the U.S. Export *
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
* restricted technology, software, or source code you receive hereunder, *
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
* technology or software, if such foreign produced direct product is subject *
* to national security controls as identified on the Commerce Control List *
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
* Country Group listings, or for additional information about the EAR or *
* your obligations under those regulations, please refer to the U.S. Bureau *
* of Industry and Security’s website at http://www.bis.doc.gov/. *
\****************************************************************************/
#include <stdio.h>
#include <sys/stat.h>
#include <string.h>
#include <stdlib.h>
#include "utils.h"
static bool usingImages = true;
//! A wrapper for malloc that checks the return value
void* alloc(size_t size) {
void* ptr = NULL;
ptr = malloc(size);
if(ptr == NULL) {
perror("malloc");
exit(-1);
}
return ptr;
}
// This function checks to make sure a file exists before we open it
void checkFile(char* filename)
{
struct stat fileStatus;
if(stat(filename, &fileStatus) != 0) {
printf("Error opening file: %s\n", filename);
exit(-1);
}
else {
if(!(S_IFREG & fileStatus.st_mode)) {
printf("File %s is not a regular file\n", filename);
exit(-1);
}
}
}
// This function checks to make sure a directory exists
void checkDir(char* dirpath)
{
struct stat fileStatus;
if(stat(dirpath, &fileStatus) != 0) {
printf("Directory does not exist: %s\n", dirpath);
exit(-1);
}
else {
if(!(S_IFDIR & fileStatus.st_mode)) {
printf("Directory was not provided: %s\n", dirpath);
exit(-1);
}
}
}
// Parse the command line arguments
void parseArguments(int argc, char** argv, char** input, char** events,
char** ipts, char* devicePref, bool* verifyResults)
{
for(int i = 2; i < argc; i++) {
if(strcmp(argv[i], "-d") == 0) { // Event dump found
if(i == argc-1) {
printf("Usage: -e Needs directory path\n");
exit(-1);
}
devicePref[0] = argv[i+1][0];
i++;
continue;
}
if(strcmp(argv[i], "-e") == 0) { // Event dump found
if(i == argc-1) {
printf("Usage: -e Needs directory path\n");
exit(-1);
}
*events = argv[i+1];
i++;
continue;
}
if(strcmp(argv[i], "-i") == 0) { // Input found
if(i == argc-1) {
printf("Usage: -i Needs directory path\n");
exit(-1);
}
*input = argv[i+1];
i++;
continue;
}
if(strcmp(argv[i], "-l") == 0) { // Ipts dump found
if(i == argc-1) {
printf("Usage: -l Needs directory path\n");
exit(-1);
}
*ipts = argv[i+1];
i++;
continue;
}
if(strcmp(argv[i], "-n") == 0) { // Don't use OpenCL images
setUsingImages(false);
continue;
}
if(strcmp(argv[i], "-v") == 0) { // Verify results
*verifyResults = true;
continue;
}
}
}
// This function that takes a positive integer 'value' and returns
// the nearest multiple of 'multiple' (used for padding columns)
unsigned int roundUp(unsigned int value, unsigned int multiple) {
unsigned int remainder = value % multiple;
// Make the value a multiple of multiple
if(remainder != 0) {
value += (multiple-remainder);
}
return value;
}
// Concatenate two strings and return a pointer to the new string
char* smartStrcat(char* str1, char* str2)
{
char* newStr = NULL;
newStr = (char*)alloc((strlen(str1)+strlen(str2)+1)*sizeof(char));
strcpy(newStr, str1);
strcat(newStr, str2);
return newStr;
}
// Set the value of using images to true if they are being
// used, or false if they are not
void setUsingImages(bool val)
{
usingImages = val;
}
// Return whether or not images are being used
bool isUsingImages()
{
return usingImages;
}

View file

@ -0,0 +1,84 @@
/****************************************************************************\
* Copyright (c) 2011, Advanced Micro Devices, Inc. *
* All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions *
* are met: *
* *
* Redistributions of source code must retain the above copyright notice, *
* this list of conditions and the following disclaimer. *
* *
* Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* Neither the name of the copyright holder nor the names of its contributors *
* may be used to endorse or promote products derived from this software *
* without specific prior written permission. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* If you use the software (in whole or in part), you shall adhere to all *
* applicable U.S., European, and other export laws, including but not *
* limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. *
* Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
* of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
* hereby certify that, except pursuant to a license granted by the United *
* States Department of Commerce Bureau of Industry and Security or as *
* otherwise permitted pursuant to a License Exception under the U.S. Export *
* Administration Regulations ("EAR"), you will not (1) export, re-export or *
* release to a national of a country in Country Groups D:1, E:1 or E:2 any *
* restricted technology, software, or source code you receive hereunder, *
* or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
* technology or software, if such foreign produced direct product is subject *
* to national security controls as identified on the Commerce Control List *
*(currently found in Supplement 1 to Part 774 of EAR). For the most current *
* Country Group listings, or for additional information about the EAR or *
* your obligations under those regulations, please refer to the U.S. Bureau *
* of Industry and Security’s website at http://www.bis.doc.gov/. *
\****************************************************************************/
#ifndef _UTILS_
#define _UTILS_
// Wrapper for malloc
void* alloc(size_t size);
// Checks for existence of directory
void checkDir(char* dirpath);
// Check for existence of file
void checkFile(char* filename);
// Parse the input command line options to the program
void parseArguments(int argc, char** argv, char** input, char** events,
char** ipts, char* devicePref, bool* verifyResults);
// Print the program usage information
void printUsage();
// Rounds up size to the nearest multiple of multiple
unsigned int roundUp(unsigned int value, unsigned int multiple);
// Concatenate two strings, creating a new one
char* smartStrcat(char* str1, char* str2);
// Set the value of usingImages
void setUsingImages(bool val);
// Return whether or not images are being used
bool isUsingImages();
#endif

View file

@ -31,7 +31,8 @@ CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=kmeans
PROJECT = kmeans
SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c
all: $(PROJECT).dump $(PROJECT).hex
@ -50,11 +51,11 @@ read_input.o: read_input.c
rmse.o: rmse.c
$(CC) $(CXXFLAGS) -c rmse.c
$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
$(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -75,4 +76,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -0,0 +1,155 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
/*************************************************************************/
/** File: cluster.c **/
/** Description: Takes as input a file, containing 1 data point per **/
/** per line, and performs a fuzzy c-means clustering **/
/** on the data. Fuzzy clustering is performed using **/
/** min to max clusters and the clustering that gets **/
/** the best score according to a compactness and **/
/** separation criterion are returned. **/
/** Author: Brendan McCane **/
/** James Cook University of North Queensland. **/
/** Australia. email: mccane@cs.jcu.edu.au **/
/** **/
/** Edited by: Jay Pisharath, Wei-keng Liao **/
/** Northwestern University. **/
/** **/
/** ================================================================ **/
/** **/
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
/** University of Virginia **/
/** **/
/** Description: No longer supports fuzzy c-means clustering; **/
/** only regular k-means clustering. **/
/** No longer performs "validity" function to analyze **/
/** compactness and separation crietria; instead **/
/** calculate root mean squared error. **/
/** **/
/*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <float.h>
#include "kmeans.h"
float min_rmse_ref = FLT_MAX;
extern double wtime(void);
/* reference min_rmse value */
/*---< cluster() >-----------------------------------------------------------*/
int cluster(int npoints, /* number of data points */
int nfeatures, /* number of attributes for each point */
float **features, /* array: [npoints][nfeatures] */
int min_nclusters, /* range of min to max number of clusters */
int max_nclusters,
float threshold, /* loop terminating factor */
int *best_nclusters, /* out: number between min and max with lowest RMSE */
float ***cluster_centres, /* out: [best_nclusters][nfeatures] */
float *min_rmse, /* out: minimum RMSE */
int isRMSE, /* calculate RMSE */
int nloops /* number of iteration for each number of clusters */
)
{
int nclusters; /* number of clusters k */
int index =0; /* number of iteration to reach the best RMSE */
int rmse; /* RMSE for each clustering */
int *membership; /* which cluster a data point belongs to */
float **tmp_cluster_centres; /* hold coordinates of cluster centers */
int i;
/* allocate memory for membership */
membership = (int*) malloc(npoints * sizeof(int));
/* sweep k from min to max_nclusters to find the best number of clusters */
for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++)
{
if (nclusters > npoints) break; /* cannot have more clusters than points */
/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
allocate(npoints, nfeatures, nclusters, features);
/* iterate nloops times for each number of clusters */
for(i = 0; i < nloops; i++)
{
/* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */
tmp_cluster_centres = kmeans_clustering(features,
nfeatures,
npoints,
nclusters,
threshold,
membership);
if (*cluster_centres) {
free((*cluster_centres)[0]);
free(*cluster_centres);
}
*cluster_centres = tmp_cluster_centres;
/* find the number of clusters with the best RMSE */
if(isRMSE)
{
rmse = rms_err(features,
nfeatures,
npoints,
tmp_cluster_centres,
nclusters);
if(rmse < min_rmse_ref){
min_rmse_ref = rmse; //update reference min RMSE
*min_rmse = min_rmse_ref; //update return min RMSE
*best_nclusters = nclusters; //update optimum number of clusters
index = i; //update number of iteration to reach best RMSE
}
}
}
deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */
}
free(membership);
return index;
}

1184
benchmarks/opencl/kmeans/getopt.c Executable file

File diff suppressed because it is too large Load diff

191
benchmarks/opencl/kmeans/getopt.h Executable file
View file

@ -0,0 +1,191 @@
/* getopt.h */
/* Declarations for getopt.
Copyright (C) 1989-1994, 1996-1999, 2001 Free Software
Foundation, Inc. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute
it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software
Foundation; either version 2.1 of the License, or
(at your option) any later version.
The GNU C Library is distributed in the hope that it will
be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General
Public License along with the GNU C Library; if not, write
to the Free Software Foundation, Inc., 59 Temple Place,
Suite 330, Boston, MA 02111-1307 USA. */
#ifndef _GETOPT_H
#ifndef __need_getopt
# define _GETOPT_H 1
#endif
/* If __GNU_LIBRARY__ is not already defined, either we are being used
standalone, or this is the first header included in the source file.
If we are being used with glibc, we need to include <features.h>, but
that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
not defined, include <ctype.h>, which will pull in <features.h> for us
if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
doesn't flood the namespace with stuff the way some other headers do.) */
#if !defined __GNU_LIBRARY__
# include <ctype.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize.
When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
#ifndef __need_getopt
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
# if (defined __STDC__ && __STDC__) || defined __cplusplus
const char *name;
# else
char *name;
# endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */
# define no_argument 0
# define required_argument 1
# define optional_argument 2
#endif /* need getopt */
/* Get definitions and prototypes for functions to process the
arguments in ARGV (ARGC of them, minus the program name) for
options given in OPTS.
Return the option character from OPTS just read. Return -1 when
there are no more options. For unrecognized options, or options
missing arguments, `optopt' is set to the option letter, and '?' is
returned.
The OPTS string is a list of characters which are recognized option
letters, optionally followed by colons, specifying that that letter
takes an argument, to be placed in `optarg'.
If a letter in OPTS is followed by two colons, its argument is
optional. This behavior is specific to the GNU `getopt'.
The argument `--' causes premature termination of argument
scanning, explicitly telling `getopt' that there are no more
options.
If OPTS begins with `--', then non-option arguments are treated as
arguments to the option '\0'. This behavior is specific to the GNU
`getopt'. */
#if (defined __STDC__ && __STDC__) || defined __cplusplus
# ifdef __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
# else /* not __GNU_LIBRARY__ */
extern int getopt ();
# endif /* __GNU_LIBRARY__ */
# ifndef __need_getopt
extern int getopt_long (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
extern int getopt_long_only (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
/* Internal only. Users should not call this directly. */
extern int _getopt_internal (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind,
int __long_only);
# endif
#else /* not __STDC__ */
extern int getopt ();
# ifndef __need_getopt
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
# endif
#endif /* __STDC__ */
#ifdef __cplusplus
}
#endif
/* Make sure we later can get all the definitions and declarations. */
#undef __need_getopt
#endif /* getopt.h */

View file

@ -0,0 +1,61 @@
#ifndef FLT_MAX
#define FLT_MAX 3.40282347e+38
#endif
__kernel void
kmeans_kernel_c(__global float *feature,
__global float *clusters,
__global int *membership,
int npoints,
int nclusters,
int nfeatures,
int offset,
int size
)
{
unsigned int point_id = get_global_id(0);
int index = 0;
//const unsigned int point_id = get_global_id(0);
if (point_id < npoints)
{
float min_dist=FLT_MAX;
for (int i=0; i < nclusters; i++) {
float dist = 0;
float ans = 0;
for (int l=0; l<nfeatures; l++){
ans += (feature[l * npoints + point_id]-clusters[i*nfeatures+l])*
(feature[l * npoints + point_id]-clusters[i*nfeatures+l]);
}
dist = ans;
if (dist < min_dist) {
min_dist = dist;
index = i;
}
}
//printf("%d\n", index);
membership[point_id] = index;
}
return;
}
__kernel void
kmeans_swap(__global float *feature,
__global float *feature_swap,
int npoints,
int nfeatures
){
unsigned int tid = get_global_id(0);
//for(int i = 0; i < nfeatures; i++)
// feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
//Lingjie Zhang modificated at 11/05/2015
if (tid < npoints){
for(int i = 0; i < nfeatures; i++)
feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
}
// end of Lingjie Zhang's modification
}

BIN
benchmarks/opencl/kmeans/kmeans Executable file

Binary file not shown.

View file

@ -0,0 +1,65 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
#ifndef _H_FUZZY_KMEANS
#define _H_FUZZY_KMEANS
#ifndef FLT_MAX
#define FLT_MAX 3.40282347e+38
#endif
#ifdef __cplusplus
extern "C" {
#endif
float euclid_dist_2 (float*, float*, int);
int find_nearest_point (float* , int, float**, int);
float rms_err(float**, int, int, float**, int);
int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int);
int setup(int argc, char** argv);
int allocate(int npoints, int nfeatures, int nclusters, float **feature);
void deallocateMemory();
int kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float **new_centers);
float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,176 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
/*************************************************************************/
/** File: kmeans_clustering.c **/
/** Description: Implementation of regular k-means clustering **/
/** algorithm **/
/** Author: Wei-keng Liao **/
/** ECE Department, Northwestern University **/
/** email: wkliao@ece.northwestern.edu **/
/** **/
/** Edited by: Jay Pisharath **/
/** Northwestern University. **/
/** **/
/** ================================================================ **/
/** **/
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
/** University of Virginia **/
/** **/
/** Description: No longer supports fuzzy c-means clustering; **/
/** only regular k-means clustering. **/
/** No longer performs "validity" function to analyze **/
/** compactness and separation crietria; instead **/
/** calculate root mean squared error. **/
/** **/
/*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#include <math.h>
#include "kmeans.h"
#define RANDOM_MAX 2147483647
extern double wtime(void);
/*----< kmeans_clustering() >---------------------------------------------*/
float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */
int nfeatures,
int npoints,
int nclusters,
float threshold,
int *membership) /* out: [npoints] */
{
int i, j, n = 0; /* counters */
int loop=0, temp;
int *new_centers_len; /* [nclusters]: no. of points in each cluster */
float delta; /* if the point moved */
float **clusters; /* out: [nclusters][nfeatures] */
float **new_centers; /* [nclusters][nfeatures] */
int *initial; /* used to hold the index of points not yet selected
prevents the "birthday problem" of dual selection (?)
considered holding initial cluster indices, but changed due to
possible, though unlikely, infinite loops */
int initial_points;
int c = 0;
/* nclusters should never be > npoints
that would guarantee a cluster without points */
if (nclusters > npoints)
nclusters = npoints;
/* allocate space for and initialize returning variable clusters[] */
clusters = (float**) malloc(nclusters * sizeof(float*));
clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float));
for (i=1; i<nclusters; i++)
clusters[i] = clusters[i-1] + nfeatures;
/* initialize the random clusters */
initial = (int *) malloc (npoints * sizeof(int));
for (i = 0; i < npoints; i++)
{
initial[i] = i;
}
initial_points = npoints;
/* randomly pick cluster centers */
for (i=0; i<nclusters && initial_points >= 0; i++) {
//n = (int)rand() % initial_points;
for (j=0; j<nfeatures; j++)
clusters[i][j] = feature[initial[n]][j]; // remapped
/* swap the selected index to the end (not really necessary,
could just move the end up) */
temp = initial[n];
initial[n] = initial[initial_points-1];
initial[initial_points-1] = temp;
initial_points--;
n++;
}
/* initialize the membership to -1 for all */
for (i=0; i < npoints; i++)
membership[i] = -1;
/* allocate space for and initialize new_centers_len and new_centers */
new_centers_len = (int*) calloc(nclusters, sizeof(int));
new_centers = (float**) malloc(nclusters * sizeof(float*));
new_centers[0] = (float*) calloc(nclusters * nfeatures, sizeof(float));
for (i=1; i<nclusters; i++)
new_centers[i] = new_centers[i-1] + nfeatures;
/* iterate until convergence */
do {
delta = 0.0;
// CUDA
delta = (float) kmeansOCL(feature, /* in: [npoints][nfeatures] */
nfeatures, /* number of attributes for each point */
npoints, /* number of data points */
nclusters, /* number of clusters */
membership, /* which cluster the point belongs to */
clusters, /* out: [nclusters][nfeatures] */
new_centers_len, /* out: number of points in each cluster */
new_centers /* sum of points in each cluster */
);
/* replace old cluster centers with new_centers */
/* CPU side of reduction */
for (i=0; i<nclusters; i++) {
for (j=0; j<nfeatures; j++) {
if (new_centers_len[i] > 0)
clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */
new_centers[i][j] = 0.0; /* set back to 0 */
}
new_centers_len[i] = 0; /* set back to 0 */
}
c++;
} while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */
printf("iterated %d times\n", c);
free(new_centers[0]);
free(new_centers);
free(new_centers_len);
return clusters;
}

Binary file not shown.

359
benchmarks/opencl/kmeans/main.cc Executable file
View file

@ -0,0 +1,359 @@
#include "kmeans.h"
#include <iostream>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#ifdef WIN
#include <windows.h>
#else
#include <pthread.h>
#include <sys/time.h>
double gettime() {
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_sec + t.tv_usec * 1e-6;
}
#endif
#ifdef NV
#include <oclUtils.h>
#else
#include <CL/cl.h>
#endif
#ifndef FLT_MAX
#define FLT_MAX 3.40282347e+38
#endif
#ifdef RD_WG_SIZE_0_0
#define BLOCK_SIZE RD_WG_SIZE_0_0
#elif defined(RD_WG_SIZE_0)
#define BLOCK_SIZE RD_WG_SIZE_0
#elif defined(RD_WG_SIZE)
#define BLOCK_SIZE RD_WG_SIZE
#else
#define BLOCK_SIZE 256
#endif
#ifdef RD_WG_SIZE_1_0
#define BLOCK_SIZE2 RD_WG_SIZE_1_0
#elif defined(RD_WG_SIZE_1)
#define BLOCK_SIZE2 RD_WG_SIZE_1
#elif defined(RD_WG_SIZE)
#define BLOCK_SIZE2 RD_WG_SIZE
#else
#define BLOCK_SIZE2 256
#endif
// local variables
static cl_context context;
static cl_command_queue cmd_queue;
static cl_device_type device_type;
static cl_device_id *device_list;
static cl_int num_devices;
static int initialize(int use_gpu) {
cl_int result;
size_t size;
/*// create OpenCL context
cl_platform_id platform_id;
if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) {
printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
return -1;
}
cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM,
(cl_context_properties)platform_id, 0};
device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
if (!context) {
printf("ERROR: clCreateContextFromType(%s) failed\n",
use_gpu ? "GPU" : "CPU");
return -1;
}
// get the list of GPUs
result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
num_devices = (int)(size / sizeof(cl_device_id));
if (result != CL_SUCCESS || num_devices < 1) {
printf("ERROR: clGetContextInfo() failed\n");
return -1;
}
device_list = new cl_device_id[num_devices];
if (!device_list) {
printf("ERROR: new cl_device_id[] failed\n");
return -1;
}
result =
clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
if (result != CL_SUCCESS) {
printf("ERROR: clGetContextInfo() failed\n");
return -1;
}*/
cl_platform_id platform_id;
num_devices = 1;
device_list = new cl_device_id[num_devices];
result = clGetPlatformIDs(1, &platform_id, NULL);
result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL);
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
// create command queue for the first device
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
if (!cmd_queue) {
printf("ERROR: clCreateCommandQueue() failed\n");
return -1;
}
return 0;
}
static int shutdown() {
// release resources
if (cmd_queue)
clReleaseCommandQueue(cmd_queue);
if (context)
clReleaseContext(context);
if (device_list)
delete device_list;
// reset all variables
cmd_queue = 0;
context = 0;
device_list = 0;
num_devices = 0;
device_type = 0;
return 0;
}
cl_mem d_feature;
cl_mem d_feature_swap;
cl_mem d_cluster;
cl_mem d_membership;
cl_kernel kernel;
cl_kernel kernel_s;
cl_kernel kernel2;
int *membership_OCL;
int *membership_d;
float *feature_d;
float *clusters_d;
float *center_d;
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
/*int sourcesize = 1024 * 1024;
char *source = (char *)calloc(sourcesize, sizeof(char));
if (!source) {
printf("ERROR: calloc(%d) failed\n", sourcesize);
return -1;
}
// read the kernel core source
char *tempchar = "./kmeans.cl";
FILE *fp = fopen(tempchar, "rb");
if (!fp) {
printf("ERROR: unable to open '%s'\n", tempchar);
return -1;
}
fread(source + strlen(source), sourcesize, 1, fp);
fclose(fp);*/
// OpenCL initialization
int use_gpu = 1;
if (initialize(use_gpu))
return -1;
// compile kernel
cl_int err = 0;
//const char *slist[2] = {source, 0};
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
return -1;
}
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
{ // show warnings/errors
// static char log[65536]; memset(log, 0, sizeof(log));
// cl_device_id device_id = 0;
// err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id),
//&device_id, NULL);
// clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG,
// sizeof(log)-1, log, NULL);
// if(err || strstr(log,"warning:") || strstr(log, "error:"))
// printf("<<<<\n%s\n>>>>\n", log);
}
if (err != CL_SUCCESS) {
printf("ERROR: clBuildProgram() => %d\n", err);
return -1;
}
char *kernel_kmeans_c = "kmeans_kernel_c";
char *kernel_swap = "kmeans_swap";
kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateKernel() 0 => %d\n", err);
return -1;
}
kernel2 = clCreateKernel(prog, kernel_swap, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateKernel() 0 => %d\n", err);
return -1;
}
clReleaseProgram(prog);
d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * n_features * sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n",
n_points * n_features, err);
return -1;
}
d_feature_swap =
clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * n_features * sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n",
n_points * n_features, err);
return -1;
}
d_cluster =
clCreateBuffer(context, CL_MEM_READ_WRITE,
n_clusters * n_features * sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n",
n_clusters * n_features, err);
return -1;
}
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * sizeof(int), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points,
err);
return -1;
}
// write buffers
err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0,
n_points * n_features * sizeof(float), feature[0],
0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n",
n_points * n_features, err);
return -1;
}
clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature);
clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap);
clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points);
clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features);
size_t global_work[3] = {n_points, 1, 1};
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
size_t local_work_size = BLOCK_SIZE; // work group size is defined by
// RD_WG_SIZE_0 or RD_WG_SIZE_0_0
// 2014/06/10 17:00:51
if (global_work[0] % local_work_size != 0)
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work,
&local_work_size, 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
return -1;
}
membership_OCL = (int *)malloc(n_points * sizeof(int));
}
void deallocateMemory() {
clReleaseMemObject(d_feature);
clReleaseMemObject(d_feature_swap);
clReleaseMemObject(d_cluster);
clReleaseMemObject(d_membership);
free(membership_OCL);
}
int main(int argc, char **argv) {
printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n",
BLOCK_SIZE, BLOCK_SIZE2);
setup(argc, argv);
shutdown();
}
int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */
int n_features, int n_points, int n_clusters, int *membership,
float **clusters, int *new_centers_len, float **new_centers) {
int delta = 0;
int i, j, k;
cl_int err = 0;
size_t global_work[3] = {n_points, 1, 1};
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
size_t local_work_size = BLOCK_SIZE2; // work group size is defined by
// RD_WG_SIZE_1 or RD_WG_SIZE_1_0
// 2014/06/10 17:00:41
if (global_work[0] % local_work_size != 0)
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0,
n_clusters * n_features * sizeof(float),
clusters[0], 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points,
err);
return -1;
}
int size = 0;
int offset = 0;
clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap);
clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster);
clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership);
clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points);
clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters);
clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features);
clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset);
clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size);
err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work,
&local_work_size, 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
return -1;
}
clFinish(cmd_queue);
err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0,
n_points * sizeof(int), membership_OCL, 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: Memcopy Out\n");
return -1;
}
delta = 0;
for (i = 0; i < n_points; i++) {
int cluster_id = membership_OCL[i];
new_centers_len[cluster_id]++;
if (membership_OCL[i] != membership[i]) {
delta++;
membership[i] = membership_OCL[i];
}
for (j = 0; j < n_features; j++) {
new_centers[cluster_id][j] += feature[i][j];
}
}
return delta;
}

View file

@ -0,0 +1,338 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
/*************************************************************************/
/** File: example.c **/
/** Description: Takes as input a file: **/
/** ascii file: containing 1 data point per line **/
/** binary file: first int is the number of objects **/
/** 2nd int is the no. of features of each **/
/** object **/
/** This example performs a fuzzy c-means clustering **/
/** on the data. Fuzzy clustering is performed using **/
/** min to max clusters and the clustering that gets **/
/** the best score according to a compactness and **/
/** separation criterion are returned. **/
/** Author: Wei-keng Liao **/
/** ECE Department Northwestern University **/
/** email: wkliao@ece.northwestern.edu **/
/** **/
/** Edited by: Jay Pisharath **/
/** Northwestern University. **/
/** **/
/** ================================================================ **/
/**
* **/
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee
* **/
/** University of Virginia
* **/
/**
* **/
/** Description: No longer supports fuzzy c-means clustering;
* **/
/** only regular k-means clustering.
* **/
/** No longer performs "validity" function to
* analyze **/
/** compactness and separation crietria; instead
* **/
/** calculate root mean squared error.
* **/
/** **/
/*************************************************************************/
#define _CRT_SECURE_NO_DEPRECATE 1
#include "kmeans.h"
#include <fcntl.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
extern double wtime(void);
/*---< usage() >------------------------------------------------------------*/
void usage(char *argv0) {
char *help = "\nUsage: %s [switches] -i filename\n\n"
" -i filename :file containing data to be clustered\n"
" -m max_nclusters :maximum number of clusters allowed "
"[default=5]\n"
" -n min_nclusters :minimum number of clusters allowed "
"[default=5]\n"
" -t threshold :threshold value "
"[default=0.001]\n"
" -l nloops :iteration for each number of clusters "
"[default=1]\n"
" -b :input file is in binary format\n"
" -r :calculate RMSE "
"[default=off]\n"
" -o :output cluster center coordinates "
"[default=off]\n";
fprintf(stderr, help, argv0);
exit(-1);
}
/*---< main() >-------------------------------------------------------------*/
int setup(int argc, char **argv) {
int opt;
extern char *optarg;
char *filename = 0;
float *buf;
char line[1024];
int isBinaryFile = 0;
float threshold = 0.001; /* default value */
int max_nclusters = 5; /* default value */
int min_nclusters = 5; /* default value */
int best_nclusters = 0;
int nfeatures = 0;
int npoints = 0;
float len;
float **features;
float **cluster_centres = NULL;
int i, j, index;
int nloops = 1; /* default value */
int isRMSE = 0;
float rmse;
int isOutput = 0;
// float cluster_timing, io_timing;
/* obtain command line arguments and change appropriate options */
while ((opt = getopt(argc, argv, "i:t:m:n:l:bro")) != EOF) {
switch (opt) {
case 'i':
filename = optarg;
break;
case 'b':
isBinaryFile = 1;
break;
case 't':
threshold = atof(optarg);
break;
case 'm':
max_nclusters = atoi(optarg);
break;
case 'n':
min_nclusters = atoi(optarg);
break;
case 'r':
isRMSE = 1;
break;
case 'o':
isOutput = 1;
break;
case 'l':
nloops = atoi(optarg);
break;
case '?':
usage(argv[0]);
break;
default:
usage(argv[0]);
break;
}
}
/* ============== I/O begin ==============*/
/* get nfeatures and npoints */
// io_timing = omp_get_wtime();
/*if (isBinaryFile) { // Binary file input
FILE *infile;
if ((infile = fopen("100", "r")) == NULL) {
fprintf(stderr, "Error: no such file (%s)\n", filename);
exit(1);
}
fread(&npoints, 1, sizeof(int), infile);
fread(&nfeatures, 1, sizeof(int), infile);
// allocate space for features[][] and read attributes of all objects
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
features = (float **)malloc(npoints * sizeof(float *));
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
for (i = 1; i < npoints; i++) {
features[i] = features[i - 1] + nfeatures;
}
fread(buf, 1, npoints * nfeatures * sizeof(float), infile);
fclose(infile);
} else {
FILE *infile;
if ((infile = fopen("100", "r")) == NULL) {
fprintf(stderr, "Error: no such file (%s)\n", filename);
exit(1);
}
while (fgets(line, 1024, infile) != NULL)
if (strtok(line, " \t\n") != 0) {
npoints++;
}
rewind(infile);
while (fgets(line, 1024, infile) != NULL) {
if (strtok(line, " \t\n") != 0) {
// ignore the id (first attribute): nfeatures = 1;
while (strtok(NULL, " ,\t\n") != NULL)
nfeatures++;
break;
}
}
// allocate space for features[] and read attributes of all objects
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
features = (float **)malloc(npoints * sizeof(float *));
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
for (i = 1; i < npoints; i++)
features[i] = features[i - 1] + nfeatures;
rewind(infile);
i = 0;
while (fgets(line, 1024, infile) != NULL) {
if (strtok(line, " \t\n") == NULL)
continue;
for (j = 0; j < nfeatures; j++) {
buf[i] = atof(strtok(NULL, " ,\t\n"));
i++;
}
}
fclose(infile);
}*/
npoints = 100;
nfeatures = 100;
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
features = (float **)malloc(npoints * sizeof(float *));
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
for (i = 1; i < npoints; i++) {
features[i] = features[i - 1] + nfeatures;
}
for (i = 0; i < npoints * nfeatures; ++i) {
buf[i] = (i % 64);
}
// io_timing = omp_get_wtime() - io_timing;
printf("\nI/O completed\n");
printf("\nNumber of objects: %d\n", npoints);
printf("Number of features: %d\n", nfeatures);
/* ============== I/O end ==============*/
// error check for clusters
if (npoints < min_nclusters) {
printf("Error: min_nclusters(%d) > npoints(%d) -- cannot proceed\n",
min_nclusters, npoints);
exit(0);
}
srand(7); /* seed for future random number generator */
memcpy(
features[0], buf,
npoints * nfeatures *
sizeof(
float)); /* now features holds 2-dimensional array of features */
free(buf);
/* ======================= core of the clustering ===================*/
// cluster_timing = omp_get_wtime(); /* Total clustering time */
cluster_centres = NULL;
index = cluster(npoints, /* number of data points */
nfeatures, /* number of features for each point */
features, /* array: [npoints][nfeatures] */
min_nclusters, /* range of min to max number of clusters */
max_nclusters, threshold, /* loop termination factor */
&best_nclusters, /* return: number between min and max */
&cluster_centres, /* return: [best_nclusters][nfeatures] */
&rmse, /* Root Mean Squared Error */
isRMSE, /* calculate RMSE */
nloops); /* number of iteration for each number of clusters */
// cluster_timing = omp_get_wtime() - cluster_timing;
/* =============== Command Line Output =============== */
/* cluster center coordinates
:displayed only for when k=1*/
if ((min_nclusters == max_nclusters) && (isOutput == 1)) {
printf("\n================= Centroid Coordinates =================\n");
for (i = 0; i < max_nclusters; i++) {
printf("%d:", i);
for (j = 0; j < nfeatures; j++) {
printf(" %.2f", cluster_centres[i][j]);
}
printf("\n\n");
}
}
len = (float)((max_nclusters - min_nclusters + 1) * nloops);
printf("Number of Iteration: %d\n", nloops);
// printf("Time for I/O: %.5fsec\n", io_timing);
// printf("Time for Entire Clustering: %.5fsec\n", cluster_timing);
if (min_nclusters != max_nclusters) {
if (nloops != 1) { // range of k, multiple iteration
// printf("Average Clustering Time: %fsec\n",
// cluster_timing / len);
printf("Best number of clusters is %d\n", best_nclusters);
} else { // range of k, single iteration
// printf("Average Clustering Time: %fsec\n",
// cluster_timing / len);
printf("Best number of clusters is %d\n", best_nclusters);
}
} else {
if (nloops != 1) { // single k, multiple iteration
// printf("Average Clustering Time: %.5fsec\n",
// cluster_timing / nloops);
if (isRMSE) // if calculated RMSE
printf("Number of trials to approach the best RMSE of %.3f is %d\n",
rmse, index + 1);
} else { // single k, single iteration
if (isRMSE) // if calculated RMSE
printf("Root Mean Squared Error: %.3f\n", rmse);
}
}
/* free up memory */
free(features[0]);
free(features);
return (0);
}

94
benchmarks/opencl/kmeans/rmse.c Executable file
View file

@ -0,0 +1,94 @@
/*************************************************************************/
/** File: rmse.c **/
/** Description: calculate root mean squared error of particular **/
/** clustering. **/
/** Author: Sang-Ha Lee **/
/** University of Virginia. **/
/** **/
/** Note: euclid_dist_2() and find_nearest_point() adopted from **/
/** Minebench code. **/
/** **/
/*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#include <math.h>
#include "kmeans.h"
extern double wtime(void);
/*----< euclid_dist_2() >----------------------------------------------------*/
/* multi-dimensional spatial Euclid distance square */
__inline
float euclid_dist_2(float *pt1,
float *pt2,
int numdims)
{
int i;
float ans=0.0;
for (i=0; i<numdims; i++)
ans += (pt1[i]-pt2[i]) * (pt1[i]-pt2[i]);
return(ans);
}
/*----< find_nearest_point() >-----------------------------------------------*/
__inline
int find_nearest_point(float *pt, /* [nfeatures] */
int nfeatures,
float **pts, /* [npts][nfeatures] */
int npts)
{
int index, i;
float max_dist=FLT_MAX;
/* find the cluster center id with min distance to pt */
for (i=0; i<npts; i++) {
float dist;
dist = euclid_dist_2(pt, pts[i], nfeatures); /* no need square root */
if (dist < max_dist) {
max_dist = dist;
index = i;
}
}
return(index);
}
/*----< rms_err(): calculates RMSE of clustering >-------------------------------------*/
float rms_err (float **feature, /* [npoints][nfeatures] */
int nfeatures,
int npoints,
float **cluster_centres, /* [nclusters][nfeatures] */
int nclusters)
{
int i;
int nearest_cluster_index; /* cluster center id with min distance to pt */
float sum_euclid = 0.0; /* sum of Euclidean distance squares */
float ret; /* return value */
/* calculate and sum the sqaure of euclidean distance*/
#pragma omp parallel for \
shared(feature,cluster_centres) \
firstprivate(npoints,nfeatures,nclusters) \
private(i, nearest_cluster_index) \
schedule (static)
for (i=0; i<npoints; i++) {
nearest_cluster_index = find_nearest_point(feature[i],
nfeatures,
cluster_centres,
nclusters);
sum_euclid += euclid_dist_2(feature[i],
cluster_centres[nearest_cluster_index],
nfeatures);
}
/* divide by n, then take sqrt */
ret = sqrt(sum_euclid / npoints);
return(ret);
}

1
benchmarks/opencl/kmeans/run Executable file
View file

@ -0,0 +1 @@
./kmeans -o -i ../../data/kmeans/kdd_cup

View file

@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=saxpy
PROJECT = saxpy
SRCS = main.cc
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -153,7 +153,7 @@ int main(int argc, char **argv) {
}
cl_event kernel_completion;
size_t global_work_size[1] = {NUM_DATA};
size_t global_work_size[1] = {NUM_DATA/2,NUM_DATA/2};
printf("attempting to enqueue kernel\n");
fflush(stdout);
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,

View file

@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=sfilter
PROJECT = sfilter
SRCS = main.cc
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=sgemm
PROJECT = sgemm
SRCS = main.cc
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH)
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=vecadd
PROJECT = vecadd
SRCS = main.cc
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug