Merge branch 'fpga_synthesis' of https://github.gatech.edu/casl/Vortex into fpga_synthesis

This commit is contained in:
felsabbagh3 2020-04-02 19:19:31 -07:00
commit 478c3cf21d
38 changed files with 3716 additions and 86952 deletions

View file

@ -0,0 +1,44 @@
LLVM_LIB_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I$(POCLRT_PATH)/include
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
PROJECT = kmeans
SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-ase: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o *.dump .depend
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View file

View file

@ -0,0 +1,155 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
/*************************************************************************/
/** File: cluster.c **/
/** Description: Takes as input a file, containing 1 data point per **/
/** per line, and performs a fuzzy c-means clustering **/
/** on the data. Fuzzy clustering is performed using **/
/** min to max clusters and the clustering that gets **/
/** the best score according to a compactness and **/
/** separation criterion are returned. **/
/** Author: Brendan McCane **/
/** James Cook University of North Queensland. **/
/** Australia. email: mccane@cs.jcu.edu.au **/
/** **/
/** Edited by: Jay Pisharath, Wei-keng Liao **/
/** Northwestern University. **/
/** **/
/** ================================================================ **/
/** **/
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
/** University of Virginia **/
/** **/
/** Description: No longer supports fuzzy c-means clustering; **/
/** only regular k-means clustering. **/
/** No longer performs "validity" function to analyze **/
/** compactness and separation crietria; instead **/
/** calculate root mean squared error. **/
/** **/
/*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <float.h>
#include "kmeans.h"
float min_rmse_ref = FLT_MAX;
extern double wtime(void);
/* reference min_rmse value */
/*---< cluster() >-----------------------------------------------------------*/
int cluster(int npoints, /* number of data points */
int nfeatures, /* number of attributes for each point */
float **features, /* array: [npoints][nfeatures] */
int min_nclusters, /* range of min to max number of clusters */
int max_nclusters,
float threshold, /* loop terminating factor */
int *best_nclusters, /* out: number between min and max with lowest RMSE */
float ***cluster_centres, /* out: [best_nclusters][nfeatures] */
float *min_rmse, /* out: minimum RMSE */
int isRMSE, /* calculate RMSE */
int nloops /* number of iteration for each number of clusters */
)
{
int nclusters; /* number of clusters k */
int index =0; /* number of iteration to reach the best RMSE */
int rmse; /* RMSE for each clustering */
int *membership; /* which cluster a data point belongs to */
float **tmp_cluster_centres; /* hold coordinates of cluster centers */
int i;
/* allocate memory for membership */
membership = (int*) malloc(npoints * sizeof(int));
/* sweep k from min to max_nclusters to find the best number of clusters */
for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++)
{
if (nclusters > npoints) break; /* cannot have more clusters than points */
/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
allocate(npoints, nfeatures, nclusters, features);
/* iterate nloops times for each number of clusters */
for(i = 0; i < nloops; i++)
{
/* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */
tmp_cluster_centres = kmeans_clustering(features,
nfeatures,
npoints,
nclusters,
threshold,
membership);
if (*cluster_centres) {
free((*cluster_centres)[0]);
free(*cluster_centres);
}
*cluster_centres = tmp_cluster_centres;
/* find the number of clusters with the best RMSE */
if(isRMSE)
{
rmse = rms_err(features,
nfeatures,
npoints,
tmp_cluster_centres,
nclusters);
if(rmse < min_rmse_ref){
min_rmse_ref = rmse; //update reference min RMSE
*min_rmse = min_rmse_ref; //update return min RMSE
*best_nclusters = nclusters; //update optimum number of clusters
index = i; //update number of iteration to reach best RMSE
}
}
}
deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */
}
free(membership);
return index;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,191 @@
/* getopt.h */
/* Declarations for getopt.
Copyright (C) 1989-1994, 1996-1999, 2001 Free Software
Foundation, Inc. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute
it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software
Foundation; either version 2.1 of the License, or
(at your option) any later version.
The GNU C Library is distributed in the hope that it will
be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General
Public License along with the GNU C Library; if not, write
to the Free Software Foundation, Inc., 59 Temple Place,
Suite 330, Boston, MA 02111-1307 USA. */
#ifndef _GETOPT_H
#ifndef __need_getopt
# define _GETOPT_H 1
#endif
/* If __GNU_LIBRARY__ is not already defined, either we are being used
standalone, or this is the first header included in the source file.
If we are being used with glibc, we need to include <features.h>, but
that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
not defined, include <ctype.h>, which will pull in <features.h> for us
if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
doesn't flood the namespace with stuff the way some other headers do.) */
#if !defined __GNU_LIBRARY__
# include <ctype.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize.
When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
#ifndef __need_getopt
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
# if (defined __STDC__ && __STDC__) || defined __cplusplus
const char *name;
# else
char *name;
# endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */
# define no_argument 0
# define required_argument 1
# define optional_argument 2
#endif /* need getopt */
/* Get definitions and prototypes for functions to process the
arguments in ARGV (ARGC of them, minus the program name) for
options given in OPTS.
Return the option character from OPTS just read. Return -1 when
there are no more options. For unrecognized options, or options
missing arguments, `optopt' is set to the option letter, and '?' is
returned.
The OPTS string is a list of characters which are recognized option
letters, optionally followed by colons, specifying that that letter
takes an argument, to be placed in `optarg'.
If a letter in OPTS is followed by two colons, its argument is
optional. This behavior is specific to the GNU `getopt'.
The argument `--' causes premature termination of argument
scanning, explicitly telling `getopt' that there are no more
options.
If OPTS begins with `--', then non-option arguments are treated as
arguments to the option '\0'. This behavior is specific to the GNU
`getopt'. */
#if (defined __STDC__ && __STDC__) || defined __cplusplus
# ifdef __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
# else /* not __GNU_LIBRARY__ */
extern int getopt ();
# endif /* __GNU_LIBRARY__ */
# ifndef __need_getopt
extern int getopt_long (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
extern int getopt_long_only (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
/* Internal only. Users should not call this directly. */
extern int _getopt_internal (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind,
int __long_only);
# endif
#else /* not __STDC__ */
extern int getopt ();
# ifndef __need_getopt
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
# endif
#endif /* __STDC__ */
#ifdef __cplusplus
}
#endif
/* Make sure we later can get all the definitions and declarations. */
#undef __need_getopt
#endif /* getopt.h */

View file

@ -0,0 +1,61 @@
#ifndef FLT_MAX
#define FLT_MAX 3.40282347e+38
#endif
__kernel void
kmeans_kernel_c(__global float *feature,
__global float *clusters,
__global int *membership,
int npoints,
int nclusters,
int nfeatures,
int offset,
int size
)
{
unsigned int point_id = get_global_id(0);
int index = 0;
//const unsigned int point_id = get_global_id(0);
if (point_id < npoints)
{
float min_dist=FLT_MAX;
for (int i=0; i < nclusters; i++) {
float dist = 0;
float ans = 0;
for (int l=0; l<nfeatures; l++){
ans += (feature[l * npoints + point_id]-clusters[i*nfeatures+l])*
(feature[l * npoints + point_id]-clusters[i*nfeatures+l]);
}
dist = ans;
if (dist < min_dist) {
min_dist = dist;
index = i;
}
}
//printf("%d\n", index);
membership[point_id] = index;
}
return;
}
__kernel void
kmeans_swap(__global float *feature,
__global float *feature_swap,
int npoints,
int nfeatures
){
unsigned int tid = get_global_id(0);
//for(int i = 0; i < nfeatures; i++)
// feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
//Lingjie Zhang modificated at 11/05/2015
if (tid < npoints){
for(int i = 0; i < nfeatures; i++)
feature_swap[i * npoints + tid] = feature[tid * nfeatures + i];
}
// end of Lingjie Zhang's modification
}

Binary file not shown.

View file

@ -0,0 +1,65 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
#ifndef _H_FUZZY_KMEANS
#define _H_FUZZY_KMEANS
#ifndef FLT_MAX
#define FLT_MAX 3.40282347e+38
#endif
#ifdef __cplusplus
extern "C" {
#endif
float euclid_dist_2 (float*, float*, int);
int find_nearest_point (float* , int, float**, int);
float rms_err(float**, int, int, float**, int);
int cluster(int, int, float**, int, int, float, int*, float***, float*, int, int);
int setup(int argc, char** argv);
int allocate(int npoints, int nfeatures, int nclusters, float **feature);
void deallocateMemory();
int kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float **new_centers);
float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,176 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
/*************************************************************************/
/** File: kmeans_clustering.c **/
/** Description: Implementation of regular k-means clustering **/
/** algorithm **/
/** Author: Wei-keng Liao **/
/** ECE Department, Northwestern University **/
/** email: wkliao@ece.northwestern.edu **/
/** **/
/** Edited by: Jay Pisharath **/
/** Northwestern University. **/
/** **/
/** ================================================================ **/
/** **/
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
/** University of Virginia **/
/** **/
/** Description: No longer supports fuzzy c-means clustering; **/
/** only regular k-means clustering. **/
/** No longer performs "validity" function to analyze **/
/** compactness and separation crietria; instead **/
/** calculate root mean squared error. **/
/** **/
/*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#include <math.h>
#include "kmeans.h"
#define RANDOM_MAX 2147483647
extern double wtime(void);
/*----< kmeans_clustering() >---------------------------------------------*/
float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */
int nfeatures,
int npoints,
int nclusters,
float threshold,
int *membership) /* out: [npoints] */
{
int i, j, n = 0; /* counters */
int loop=0, temp;
int *new_centers_len; /* [nclusters]: no. of points in each cluster */
float delta; /* if the point moved */
float **clusters; /* out: [nclusters][nfeatures] */
float **new_centers; /* [nclusters][nfeatures] */
int *initial; /* used to hold the index of points not yet selected
prevents the "birthday problem" of dual selection (?)
considered holding initial cluster indices, but changed due to
possible, though unlikely, infinite loops */
int initial_points;
int c = 0;
/* nclusters should never be > npoints
that would guarantee a cluster without points */
if (nclusters > npoints)
nclusters = npoints;
/* allocate space for and initialize returning variable clusters[] */
clusters = (float**) malloc(nclusters * sizeof(float*));
clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float));
for (i=1; i<nclusters; i++)
clusters[i] = clusters[i-1] + nfeatures;
/* initialize the random clusters */
initial = (int *) malloc (npoints * sizeof(int));
for (i = 0; i < npoints; i++)
{
initial[i] = i;
}
initial_points = npoints;
/* randomly pick cluster centers */
for (i=0; i<nclusters && initial_points >= 0; i++) {
//n = (int)rand() % initial_points;
for (j=0; j<nfeatures; j++)
clusters[i][j] = feature[initial[n]][j]; // remapped
/* swap the selected index to the end (not really necessary,
could just move the end up) */
temp = initial[n];
initial[n] = initial[initial_points-1];
initial[initial_points-1] = temp;
initial_points--;
n++;
}
/* initialize the membership to -1 for all */
for (i=0; i < npoints; i++)
membership[i] = -1;
/* allocate space for and initialize new_centers_len and new_centers */
new_centers_len = (int*) calloc(nclusters, sizeof(int));
new_centers = (float**) malloc(nclusters * sizeof(float*));
new_centers[0] = (float*) calloc(nclusters * nfeatures, sizeof(float));
for (i=1; i<nclusters; i++)
new_centers[i] = new_centers[i-1] + nfeatures;
/* iterate until convergence */
do {
delta = 0.0;
// CUDA
delta = (float) kmeansOCL(feature, /* in: [npoints][nfeatures] */
nfeatures, /* number of attributes for each point */
npoints, /* number of data points */
nclusters, /* number of clusters */
membership, /* which cluster the point belongs to */
clusters, /* out: [nclusters][nfeatures] */
new_centers_len, /* out: number of points in each cluster */
new_centers /* sum of points in each cluster */
);
/* replace old cluster centers with new_centers */
/* CPU side of reduction */
for (i=0; i<nclusters; i++) {
for (j=0; j<nfeatures; j++) {
if (new_centers_len[i] > 0)
clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */
new_centers[i][j] = 0.0; /* set back to 0 */
}
new_centers_len[i] = 0; /* set back to 0 */
}
c++;
} while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */
printf("iterated %d times\n", c);
free(new_centers[0]);
free(new_centers);
free(new_centers_len);
return clusters;
}

Binary file not shown.

View file

@ -0,0 +1,382 @@
#include "kmeans.h"
#include <iostream>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#ifdef WIN
#include <windows.h>
#else
#include <pthread.h>
#include <sys/time.h>
double gettime() {
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_sec + t.tv_usec * 1e-6;
}
#endif
#ifdef NV
#include <oclUtils.h>
#else
#include <CL/cl.h>
#endif
#ifndef FLT_MAX
#define FLT_MAX 3.40282347e+38
#endif
#ifdef RD_WG_SIZE_0_0
#define BLOCK_SIZE RD_WG_SIZE_0_0
#elif defined(RD_WG_SIZE_0)
#define BLOCK_SIZE RD_WG_SIZE_0
#elif defined(RD_WG_SIZE)
#define BLOCK_SIZE RD_WG_SIZE
#else
#define BLOCK_SIZE 256
#endif
#ifdef RD_WG_SIZE_1_0
#define BLOCK_SIZE2 RD_WG_SIZE_1_0
#elif defined(RD_WG_SIZE_1)
#define BLOCK_SIZE2 RD_WG_SIZE_1
#elif defined(RD_WG_SIZE)
#define BLOCK_SIZE2 RD_WG_SIZE
#else
#define BLOCK_SIZE2 256
#endif
// local variables
static cl_context context;
static cl_command_queue cmd_queue;
static cl_device_type device_type;
static cl_device_id *device_list;
static cl_int num_devices;
static int initialize(int use_gpu) {
cl_int result;
size_t size;
/*// create OpenCL context
cl_platform_id platform_id;
if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) {
printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
return -1;
}
cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM,
(cl_context_properties)platform_id, 0};
device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL);
if (!context) {
printf("ERROR: clCreateContextFromType(%s) failed\n",
use_gpu ? "GPU" : "CPU");
return -1;
}
// get the list of GPUs
result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
num_devices = (int)(size / sizeof(cl_device_id));
if (result != CL_SUCCESS || num_devices < 1) {
printf("ERROR: clGetContextInfo() failed\n");
return -1;
}
device_list = new cl_device_id[num_devices];
if (!device_list) {
printf("ERROR: new cl_device_id[] failed\n");
return -1;
}
result =
clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
if (result != CL_SUCCESS) {
printf("ERROR: clGetContextInfo() failed\n");
return -1;
}*/
cl_platform_id platform_id;
num_devices = 1;
device_list = new cl_device_id[num_devices];
result = clGetPlatformIDs(1, &platform_id, NULL);
result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL);
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
// create command queue for the first device
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
if (!cmd_queue) {
printf("ERROR: clCreateCommandQueue() failed\n");
return -1;
}
return 0;
}
static int shutdown() {
// release resources
if (cmd_queue)
clReleaseCommandQueue(cmd_queue);
if (context)
clReleaseContext(context);
if (device_list)
delete device_list;
// reset all variables
cmd_queue = 0;
context = 0;
device_list = 0;
num_devices = 0;
device_type = 0;
return 0;
}
cl_mem d_feature;
cl_mem d_feature_swap;
cl_mem d_cluster;
cl_mem d_membership;
cl_kernel kernel;
cl_kernel kernel_s;
cl_kernel kernel2;
int *membership_OCL;
int *membership_d;
float *feature_d;
float *clusters_d;
float *center_d;
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
/*int sourcesize = 1024 * 1024;
char *source = (char *)calloc(sourcesize, sizeof(char));
if (!source) {
printf("ERROR: calloc(%d) failed\n", sourcesize);
return -1;
}
// read the kernel core source
char *tempchar = "./kmeans.cl";
FILE *fp = fopen(tempchar, "rb");
if (!fp) {
printf("ERROR: unable to open '%s'\n", tempchar);
return -1;
}
fread(source + strlen(source), sourcesize, 1, fp);
fclose(fp);*/
// OpenCL initialization
int use_gpu = 1;
if (initialize(use_gpu))
return -1;
// compile kernel
cl_int err = 0;
//const char *slist[2] = {source, 0};
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
return -1;
}
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
{ // show warnings/errors
// static char log[65536]; memset(log, 0, sizeof(log));
// cl_device_id device_id = 0;
// err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id),
//&device_id, NULL);
// clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG,
// sizeof(log)-1, log, NULL);
// if(err || strstr(log,"warning:") || strstr(log, "error:"))
// printf("<<<<\n%s\n>>>>\n", log);
}
if (err != CL_SUCCESS) {
printf("ERROR: clBuildProgram() => %d\n", err);
return -1;
}
char *kernel_kmeans_c = "kmeans_kernel_c";
char *kernel_swap = "kmeans_swap";
kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateKernel() 0 => %d\n", err);
return -1;
}
kernel2 = clCreateKernel(prog, kernel_swap, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateKernel() 0 => %d\n", err);
return -1;
}
clReleaseProgram(prog);
d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * n_features * sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n",
n_points * n_features, err);
return -1;
}
d_feature_swap =
clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * n_features * sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n",
n_points * n_features, err);
return -1;
}
d_cluster =
clCreateBuffer(context, CL_MEM_READ_WRITE,
n_clusters * n_features * sizeof(float), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n",
n_clusters * n_features, err);
return -1;
}
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * sizeof(int), NULL, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points,
err);
return -1;
}
// write buffers
err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0,
n_points * n_features * sizeof(float), feature[0],
0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n",
n_points * n_features, err);
return -1;
}
clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature);
clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap);
clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points);
clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features);
size_t global_work[3] = {n_points, 1, 1};
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
size_t local_work_size = BLOCK_SIZE; // work group size is defined by
// RD_WG_SIZE_0 or RD_WG_SIZE_0_0
// 2014/06/10 17:00:51
if (global_work[0] % local_work_size != 0)
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work,
&local_work_size, 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
return -1;
}
membership_OCL = (int *)malloc(n_points * sizeof(int));
}
void deallocateMemory() {
clReleaseMemObject(d_feature);
clReleaseMemObject(d_feature_swap);
clReleaseMemObject(d_cluster);
clReleaseMemObject(d_membership);
free(membership_OCL);
}
int main(int argc, char **argv) {
printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n",
BLOCK_SIZE, BLOCK_SIZE2);
setup(argc, argv);
shutdown();
}
int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */
int n_features, int n_points, int n_clusters, int *membership,
float **clusters, int *new_centers_len, float **new_centers) {
int delta = 0;
int i, j, k;
cl_int err = 0;
size_t global_work[3] = {n_points, 1, 1};
/// Ke Wang adjustable local group size 2013/08/07 10:37:33
size_t local_work_size = BLOCK_SIZE2; // work group size is defined by
// RD_WG_SIZE_1 or RD_WG_SIZE_1_0
// 2014/06/10 17:00:41
if (global_work[0] % local_work_size != 0)
global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size;
err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0,
n_clusters * n_features * sizeof(float),
clusters[0], 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points,
err);
return -1;
}
int size = 0;
int offset = 0;
clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap);
clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster);
clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership);
clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points);
clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters);
clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features);
clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset);
clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size);
err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work,
&local_work_size, 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err);
return -1;
}
clFinish(cmd_queue);
err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0,
n_points * sizeof(int), membership_OCL, 0, 0, 0);
if (err != CL_SUCCESS) {
printf("ERROR: Memcopy Out\n");
return -1;
}
delta = 0;
for (i = 0; i < n_points; i++) {
int cluster_id = membership_OCL[i];
new_centers_len[cluster_id]++;
if (membership_OCL[i] != membership[i]) {
delta++;
membership[i] = membership_OCL[i];
}
for (j = 0; j < n_features; j++) {
new_centers[cluster_id][j] += feature[i][j];
}
}
return delta;
}

View file

@ -0,0 +1,338 @@
/*****************************************************************************/
/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
/*By downloading, copying, installing or using the software you agree */
/*to this license. If you do not agree to this license, do not download, */
/*install, copy or use the software. */
/* */
/* */
/*Copyright (c) 2005 Northwestern University */
/*All rights reserved. */
/*Redistribution of the software in source and binary forms, */
/*with or without modification, is permitted provided that the */
/*following conditions are met: */
/* */
/*1 Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/*2 Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution.*/
/* */
/*3 Neither the name of Northwestern University nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/*POSSIBILITY OF SUCH DAMAGE. */
/******************************************************************************/
/*************************************************************************/
/** File: example.c **/
/** Description: Takes as input a file: **/
/** ascii file: containing 1 data point per line **/
/** binary file: first int is the number of objects **/
/** 2nd int is the no. of features of each **/
/** object **/
/** This example performs a fuzzy c-means clustering **/
/** on the data. Fuzzy clustering is performed using **/
/** min to max clusters and the clustering that gets **/
/** the best score according to a compactness and **/
/** separation criterion are returned. **/
/** Author: Wei-keng Liao **/
/** ECE Department Northwestern University **/
/** email: wkliao@ece.northwestern.edu **/
/** **/
/** Edited by: Jay Pisharath **/
/** Northwestern University. **/
/** **/
/** ================================================================ **/
/**
* **/
/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee
* **/
/** University of Virginia
* **/
/**
* **/
/** Description: No longer supports fuzzy c-means clustering;
* **/
/** only regular k-means clustering.
* **/
/** No longer performs "validity" function to
* analyze **/
/** compactness and separation crietria; instead
* **/
/** calculate root mean squared error.
* **/
/** **/
/*************************************************************************/
#define _CRT_SECURE_NO_DEPRECATE 1
#include "kmeans.h"
#include <fcntl.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
extern double wtime(void);
/*---< usage() >------------------------------------------------------------*/
void usage(char *argv0) {
char *help = "\nUsage: %s [switches] -i filename\n\n"
" -i filename :file containing data to be clustered\n"
" -m max_nclusters :maximum number of clusters allowed "
"[default=5]\n"
" -n min_nclusters :minimum number of clusters allowed "
"[default=5]\n"
" -t threshold :threshold value "
"[default=0.001]\n"
" -l nloops :iteration for each number of clusters "
"[default=1]\n"
" -b :input file is in binary format\n"
" -r :calculate RMSE "
"[default=off]\n"
" -o :output cluster center coordinates "
"[default=off]\n";
fprintf(stderr, help, argv0);
exit(-1);
}
/*---< main() >-------------------------------------------------------------*/
int setup(int argc, char **argv) {
int opt;
extern char *optarg;
char *filename = 0;
float *buf;
char line[1024];
int isBinaryFile = 0;
float threshold = 0.001; /* default value */
int max_nclusters = 5; /* default value */
int min_nclusters = 5; /* default value */
int best_nclusters = 0;
int nfeatures = 0;
int npoints = 0;
float len;
float **features;
float **cluster_centres = NULL;
int i, j, index;
int nloops = 1; /* default value */
int isRMSE = 0;
float rmse;
int isOutput = 0;
// float cluster_timing, io_timing;
/* obtain command line arguments and change appropriate options */
while ((opt = getopt(argc, argv, "i:t:m:n:l:bro")) != EOF) {
switch (opt) {
case 'i':
filename = optarg;
break;
case 'b':
isBinaryFile = 1;
break;
case 't':
threshold = atof(optarg);
break;
case 'm':
max_nclusters = atoi(optarg);
break;
case 'n':
min_nclusters = atoi(optarg);
break;
case 'r':
isRMSE = 1;
break;
case 'o':
isOutput = 1;
break;
case 'l':
nloops = atoi(optarg);
break;
case '?':
usage(argv[0]);
break;
default:
usage(argv[0]);
break;
}
}
/* ============== I/O begin ==============*/
/* get nfeatures and npoints */
// io_timing = omp_get_wtime();
/*if (isBinaryFile) { // Binary file input
FILE *infile;
if ((infile = fopen("100", "r")) == NULL) {
fprintf(stderr, "Error: no such file (%s)\n", filename);
exit(1);
}
fread(&npoints, 1, sizeof(int), infile);
fread(&nfeatures, 1, sizeof(int), infile);
// allocate space for features[][] and read attributes of all objects
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
features = (float **)malloc(npoints * sizeof(float *));
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
for (i = 1; i < npoints; i++) {
features[i] = features[i - 1] + nfeatures;
}
fread(buf, 1, npoints * nfeatures * sizeof(float), infile);
fclose(infile);
} else {
FILE *infile;
if ((infile = fopen("100", "r")) == NULL) {
fprintf(stderr, "Error: no such file (%s)\n", filename);
exit(1);
}
while (fgets(line, 1024, infile) != NULL)
if (strtok(line, " \t\n") != 0) {
npoints++;
}
rewind(infile);
while (fgets(line, 1024, infile) != NULL) {
if (strtok(line, " \t\n") != 0) {
// ignore the id (first attribute): nfeatures = 1;
while (strtok(NULL, " ,\t\n") != NULL)
nfeatures++;
break;
}
}
// allocate space for features[] and read attributes of all objects
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
features = (float **)malloc(npoints * sizeof(float *));
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
for (i = 1; i < npoints; i++)
features[i] = features[i - 1] + nfeatures;
rewind(infile);
i = 0;
while (fgets(line, 1024, infile) != NULL) {
if (strtok(line, " \t\n") == NULL)
continue;
for (j = 0; j < nfeatures; j++) {
buf[i] = atof(strtok(NULL, " ,\t\n"));
i++;
}
}
fclose(infile);
}*/
npoints = 100;
nfeatures = 100;
buf = (float *)malloc(npoints * nfeatures * sizeof(float));
features = (float **)malloc(npoints * sizeof(float *));
features[0] = (float *)malloc(npoints * nfeatures * sizeof(float));
for (i = 1; i < npoints; i++) {
features[i] = features[i - 1] + nfeatures;
}
for (i = 0; i < npoints * nfeatures; ++i) {
buf[i] = (i % 64);
}
// io_timing = omp_get_wtime() - io_timing;
printf("\nI/O completed\n");
printf("\nNumber of objects: %d\n", npoints);
printf("Number of features: %d\n", nfeatures);
/* ============== I/O end ==============*/
// error check for clusters
if (npoints < min_nclusters) {
printf("Error: min_nclusters(%d) > npoints(%d) -- cannot proceed\n",
min_nclusters, npoints);
exit(0);
}
srand(7); /* seed for future random number generator */
memcpy(
features[0], buf,
npoints * nfeatures *
sizeof(
float)); /* now features holds 2-dimensional array of features */
free(buf);
/* ======================= core of the clustering ===================*/
// cluster_timing = omp_get_wtime(); /* Total clustering time */
cluster_centres = NULL;
index = cluster(npoints, /* number of data points */
nfeatures, /* number of features for each point */
features, /* array: [npoints][nfeatures] */
min_nclusters, /* range of min to max number of clusters */
max_nclusters, threshold, /* loop termination factor */
&best_nclusters, /* return: number between min and max */
&cluster_centres, /* return: [best_nclusters][nfeatures] */
&rmse, /* Root Mean Squared Error */
isRMSE, /* calculate RMSE */
nloops); /* number of iteration for each number of clusters */
// cluster_timing = omp_get_wtime() - cluster_timing;
/* =============== Command Line Output =============== */
/* cluster center coordinates
:displayed only for when k=1*/
if ((min_nclusters == max_nclusters) && (isOutput == 1)) {
printf("\n================= Centroid Coordinates =================\n");
for (i = 0; i < max_nclusters; i++) {
printf("%d:", i);
for (j = 0; j < nfeatures; j++) {
printf(" %.2f", cluster_centres[i][j]);
}
printf("\n\n");
}
}
len = (float)((max_nclusters - min_nclusters + 1) * nloops);
printf("Number of Iteration: %d\n", nloops);
// printf("Time for I/O: %.5fsec\n", io_timing);
// printf("Time for Entire Clustering: %.5fsec\n", cluster_timing);
if (min_nclusters != max_nclusters) {
if (nloops != 1) { // range of k, multiple iteration
// printf("Average Clustering Time: %fsec\n",
// cluster_timing / len);
printf("Best number of clusters is %d\n", best_nclusters);
} else { // range of k, single iteration
// printf("Average Clustering Time: %fsec\n",
// cluster_timing / len);
printf("Best number of clusters is %d\n", best_nclusters);
}
} else {
if (nloops != 1) { // single k, multiple iteration
// printf("Average Clustering Time: %.5fsec\n",
// cluster_timing / nloops);
if (isRMSE) // if calculated RMSE
printf("Number of trials to approach the best RMSE of %.3f is %d\n",
rmse, index + 1);
} else { // single k, single iteration
if (isRMSE) // if calculated RMSE
printf("Root Mean Squared Error: %.3f\n", rmse);
}
}
/* free up memory */
free(features[0]);
free(features);
return (0);
}

View file

@ -0,0 +1,94 @@
/*************************************************************************/
/** File: rmse.c **/
/** Description: calculate root mean squared error of particular **/
/** clustering. **/
/** Author: Sang-Ha Lee **/
/** University of Virginia. **/
/** **/
/** Note: euclid_dist_2() and find_nearest_point() adopted from **/
/** Minebench code. **/
/** **/
/*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <float.h>
#include <math.h>
#include "kmeans.h"
extern double wtime(void);
/*----< euclid_dist_2() >----------------------------------------------------*/
/* multi-dimensional spatial Euclid distance square */
__inline
float euclid_dist_2(float *pt1,
float *pt2,
int numdims)
{
int i;
float ans=0.0;
for (i=0; i<numdims; i++)
ans += (pt1[i]-pt2[i]) * (pt1[i]-pt2[i]);
return(ans);
}
/*----< find_nearest_point() >-----------------------------------------------*/
__inline
int find_nearest_point(float *pt, /* [nfeatures] */
int nfeatures,
float **pts, /* [npts][nfeatures] */
int npts)
{
int index, i;
float max_dist=FLT_MAX;
/* find the cluster center id with min distance to pt */
for (i=0; i<npts; i++) {
float dist;
dist = euclid_dist_2(pt, pts[i], nfeatures); /* no need square root */
if (dist < max_dist) {
max_dist = dist;
index = i;
}
}
return(index);
}
/*----< rms_err(): calculates RMSE of clustering >-------------------------------------*/
float rms_err (float **feature, /* [npoints][nfeatures] */
int nfeatures,
int npoints,
float **cluster_centres, /* [nclusters][nfeatures] */
int nclusters)
{
int i;
int nearest_cluster_index; /* cluster center id with min distance to pt */
float sum_euclid = 0.0; /* sum of Euclidean distance squares */
float ret; /* return value */
/* calculate and sum the sqaure of euclidean distance*/
#pragma omp parallel for \
shared(feature,cluster_centres) \
firstprivate(npoints,nfeatures,nclusters) \
private(i, nearest_cluster_index) \
schedule (static)
for (i=0; i<npoints; i++) {
nearest_cluster_index = find_nearest_point(feature[i],
nfeatures,
cluster_centres,
nclusters);
sum_euclid += euclid_dist_2(feature[i],
cluster_centres[nearest_cluster_index],
nfeatures);
}
/* divide by n, then take sqrt */
ret = sqrt(sum_euclid / npoints);
return(ret);
}

View file

@ -55,6 +55,28 @@ static cl_device_type device_type;
static cl_device_id *device_list;
static cl_int num_devices;
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
static int initialize(int use_gpu) {
cl_int result;
size_t size;
@ -147,6 +169,11 @@ float *feature_d;
float *clusters_d;
float *center_d;
uint8_t* kernel_bin = NULL;
size_t kernel_size = 0;
cl_int binary_status = 0;
int allocate(int n_points, int n_features, int n_clusters, float **feature) {
/*int sourcesize = 1024 * 1024;
char *source = (char *)calloc(sourcesize, sizeof(char));
@ -170,11 +197,18 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
if (initialize(use_gpu))
return -1;
// Load Kernel
if (read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) {
return -1;
}
// compile kernel
cl_int err = 0;
//const char *slist[2] = {source, 0};
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
cl_program prog = clCreateProgramWithBinary(
context, 1, device_list, &kernel_size, &kernel_bin, &binary_status, &err);
// cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
return -1;
@ -280,6 +314,7 @@ void deallocateMemory() {
clReleaseMemObject(d_feature_swap);
clReleaseMemObject(d_cluster);
clReleaseMemObject(d_membership);
if (kernel_bin) free(kernel_bin);
free(membership_OCL);
}

View file

@ -1,24 +1,35 @@
BUILD_DIR=build_sim
BUILD_DIR=build_ase
all: ase fpga
ase: build-setup
ase: setup-ase
make -C $(BUILD_DIR)
fpga: build-setup
# TODO
fpga: setup-fpga
cd build_fpga && qsub-synth
build-setup: $(BUILD_DIR)/Makefile
setup-ase: build_ase/Makefile
$(BUILD_DIR)/Makefile:
afu_sim_setup --sources=sources.txt --platform discrete_pcie3 $(BUILD_DIR) -f
setup-fpga: build_fpga/build/dcp.qpf
build_ase/Makefile:
afu_sim_setup --s sources.txt build_ase
build_fpga/build/dcp.qpf:
afu_synth_setup -s sources.txt build_fpga
run-ase:
cd $(BUILD_DIR) && MENT_VSIM_OPT="-dpicpppath /usr/bin/gcc" make sim
cd build_ase && make sim
wave:
vsim -view build_ase/work/vsim.wlf -do wave.do
run-fpga:
# TODO
clean:
rm -rf $(BUILD_DIR)
clean-ase:
rm -rf build_ase
clean-fpga:
rm -rf build_fpga

View file

@ -108,7 +108,7 @@ module ccip_std_afu
#(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
)
hello_mem_afu_inst
vortex_afu_inst
(
.clk (clk),
.SoftReset (reset_T1),

View file

@ -1,5 +1,7 @@
vortex_afu.json
+define+GLOBAL_BLOCK_SIZE_BYTES=64
+incdir+.
+incdir+../../rtl
+incdir+../../rtl/shared_memory
@ -13,6 +15,7 @@ vortex_afu.json
../../rtl/VX_define.v
../../rtl/VX_cache/VX_cache_config.v
../../rtl/Vortex_SOC.v
../../rtl/Vortex_Cluster.v
../../rtl/Vortex.v
../../rtl/VX_front_end.v
../../rtl/VX_back_end.v

View file

@ -3,7 +3,19 @@
"afu-image": {
"power": 0,
"clock-frequency-high": "auto",
"clock-frequency-low": "auto",
"clock-frequency-low": "auto",
"mmio-csr-cmd": 10,
"mmio-csr-status": 12,
"mmio-csr-io-addr": 14,
"mmio-csr-mem-addr": 16,
"mmio-csr-data-size": 18,
"cmd-type-read": 1,
"cmd-type-write": 2,
"cmd-type-run": 3,
"cmd-type-snoop": 4,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,5 @@
#!/bin/bash
source /tools/reconfig/intel/19.3/rg_intel_fpga_end_19.3.sh
export PATH=/tools/opae/1.4.0/bin:/tools/reconfig/intel/19.3/modelsim_ase/bin:$PATH
export LD_LIBRARY_PATH=/tools/opae/1.4.0/lib:$PATH

View file

@ -1,6 +1,9 @@
all: opae rtlsim simx
all: dummy
dummy:
$(MAKE) -C dummy
opae:
$(MAKE) -C opae
@ -12,6 +15,7 @@ simx:
$(MAKE) -C simx
clean:
$(MAKE) clean -C dummy
$(MAKE) clean -C opae
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx

20
driver/sw/dummy/Makefile Normal file
View file

@ -0,0 +1,20 @@
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../../runtime
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../vx_utils.cpp
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
clean:
rm -rf $(PROJECT) obj_dir

View file

@ -0,0 +1,45 @@
#include <vortex.h>
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
return -1;
}
extern int vx_dev_close(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
return -1;
}
extern int vx_flush_caches(vx_device_h /*hdevice*/, size_t /*dev_maddr*/, size_t /*size*/) {
return -1;
}
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}
extern volatile void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
return nullptr;
}
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
return -1;
}
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
return -1;
}
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
return -1;
}
extern int vx_start(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
return -1;
}

View file

@ -1,7 +1,7 @@
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include
CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../../runtime
LDFLAGS += -L/tools/opae/1.4.0/lib
@ -17,6 +17,8 @@ CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
CXXFLAGS += -DGLOBAL_BLOCK_SIZE_BYTES=64
LDFLAGS += -luuid
LDFLAGS += -shared
@ -50,7 +52,7 @@ $(PROJECT_ASE): $(SRCS) $(ASE_DIR)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@
vortex.o: vortex.cpp $(AFU_JSON_INFO)
$(CC) $(CXXFLAGS) -c vortex.cpp -o $@
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
$(ASE_DIR):
mkdir -p ase

View file

@ -4,35 +4,35 @@
#include <unistd.h>
#include <assert.h>
#include <uuid/uuid.h>
#include <opae/fpga.h>
#include <vortex.h>
#include "vortex_afu.h"
// MMIO Address Mappings
#define MMIO_COPY_IO_ADDRESS 0X120
#define MMIO_COPY_AVM_ADDRESS 0x100
#define MMIO_COPY_DATA_SIZE 0X118
#define MMIO_CMD_TYPE 0X110
#define MMIO_READY_FOR_CMD 0X198
#define MMIO_CMD_TYPE_READ 0
#define MMIO_CMD_TYPE_WRITE 1
#define MMIO_CMD_TYPE_START 2
#define MMIO_CMD_TYPE_SNOOP 3
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
printf("OPAE Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
#define CMD_TYPE_SNOOP AFU_IMAGE_CMD_TYPE_SNOOP
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4)
#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4)
#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4)
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_device_ {
fpga_handle fpga;
size_t mem_allocation;
@ -42,21 +42,19 @@ typedef struct vx_buffer_ {
uint64_t wsid;
volatile void* host_ptr;
uint64_t io_addr;
fpga_handle fpga;
vx_device_h hdevice;
size_t size;
} vx_buffer_t;
static size_t align_size(size_t size) {
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
return cache_block_size * ((size + cache_block_size - 1) / cache_block_size);
}
///////////////////////////////////////////////////////////////////////////////
// Search for an accelerator matching the requested UUID and connect to it
// Convert this to void if required as storing the fpga_handle to params variable
extern int vx_dev_open(vx_device_h* hdevice) {
fpga_properties filter = NULL;
fpga_properties filter = nullptr;
fpga_result res;
fpga_guid guid;
fpga_token accel_token;
@ -64,11 +62,14 @@ extern int vx_dev_open(vx_device_h* hdevice) {
fpga_handle accel_handle;
vx_device_t* device;
if (NULL == hdevice)
if (nullptr == hdevice)
return -1;
// ensure that the block size 64
assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE));
// Set up a filter that will search for an accelerator
fpgaGetProperties(NULL, &filter);
fpgaGetProperties(nullptr, &filter);
fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
// Add the desired UUID to the filter
@ -84,13 +85,13 @@ extern int vx_dev_open(vx_device_h* hdevice) {
if (num_matches < 1) {
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
return NULL;
return -1;
}
// Open accelerator
res = fpgaOpen(accel_token, &accel_handle, 0);
if (FPGA_OK != res) {
return NULL;
return -1;
}
// Done with token
@ -98,9 +99,9 @@ extern int vx_dev_open(vx_device_h* hdevice) {
// allocate device object
device = (vx_device_t*)malloc(sizeof(vx_device_t));
if (NULL == device) {
if (nullptr == device) {
fpgaClose(accel_handle);
return NULL;
return -1;
}
device->fpga = accel_handle;
@ -111,9 +112,8 @@ extern int vx_dev_open(vx_device_h* hdevice) {
return 0;
}
// Close the fpga when all the operations are done
extern int vx_dev_close(vx_device_h hdevice) {
if (NULL == hdevice)
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
@ -126,15 +126,15 @@ extern int vx_dev_close(vx_device_h hdevice) {
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (NULL == hdevice
|| NULL == dev_maddr
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size);
auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
@ -151,9 +151,9 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
uint64_t io_addr;
vx_buffer_t* buffer;
if (NULL == hdevice
if (nullptr == hdevice
|| 0 >= size
|| NULL == hbuffer)
|| nullptr == hbuffer)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
@ -174,7 +174,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
// allocate buffer object
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
if (NULL == buffer) {
if (nullptr == buffer) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
@ -182,7 +182,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->fpga = device->fpga;
buffer->hdevice = hdevice;
buffer->size = size;
*hbuffer = buffer;
@ -191,136 +191,30 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
}
extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
if (NULL == buffer)
return NULL;
return buffer->host_ptr;
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
if (NULL == buffer)
if (nullptr == hbuffer)
return -1;
fpgaReleaseBuffer(buffer->fpga, buffer->wsid);
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
fpgaReleaseBuffer(device->fpga, buffer->wsid);
free(buffer);
return 0;
}
// Check if HW is ready for SW
static int ready_for_sw(fpga_handle hdevice) {
uint64_t data = 0;
struct timespec sleep_time;
#ifdef USE_ASE
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
do {
CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data));
nanosleep(&sleep_time, NULL);
} while (data != 0x1);
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (NULL == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + src_offset);
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE));
// Wait for the write operation to finish
return ready_for_sw(buffer->fpga);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (NULL == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + dest_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + dest_offset);
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ));
// Wait for the write operation to finish
return ready_for_sw(buffer->fpga);
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (NULL == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (ready_for_sw(buffer->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_SNOOP));
// Wait for the write operation to finish
return ready_for_sw(buffer->fpga);
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (NULL == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (ready_for_sw(device->fpga) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_START));
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (NULL == hdevice)
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
@ -328,7 +222,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
uint64_t data = 0;
struct timespec sleep_time;
#ifdef USE_ASE
#if defined(USE_ASE)
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
@ -339,13 +233,106 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
// to milliseconds
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
do {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_READY_FOR_CMD, &data));
nanosleep(&sleep_time, NULL);
sleep_time_ms -= sleep_time_ms;
if (timeout <= sleep_time_ms)
break;
} while (data != 0x1);
for (;;) {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data));
if (0 == data || 0 == timeout)
break;
nanosleep(&sleep_time, nullptr);
timeout -= sleep_time_ms;
};
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
// bound checking
if (size + src_offset > buffer->size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
// bound checking
if (size + dest_offset > buffer->size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (nullptr == hdevice
|| 0 >= size)
return -1;
vx_device_t* device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_SNOOP));
// Wait for the write operation to finish
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
return 0;
}

View file

@ -11,17 +11,6 @@
#include <ram.h>
#include <simulator.h>
#define PAGE_SIZE 4096
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
@ -197,7 +186,7 @@ private:
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (NULL == hdevice)
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
@ -217,8 +206,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (NULL == hdevice
|| NULL == dev_maddr
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
@ -227,7 +216,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
}
extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
if (NULL == hdevice
if (nullptr == hdevice
|| 0 >= size)
return -1;
@ -240,7 +229,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| NULL == hbuffer)
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);

View file

@ -1,7 +1,7 @@
CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime
CFLAGS += -fPIC

View file

@ -13,15 +13,6 @@
#define PAGE_SIZE 4096
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
static size_t align_size(size_t size) {
@ -206,7 +197,7 @@ private:
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (NULL == hdevice)
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
@ -226,8 +217,8 @@ extern int vx_dev_close(vx_device_h hdevice) {
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (NULL == hdevice
|| NULL == dev_maddr
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
@ -236,7 +227,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
}
extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) {
if (NULL == hdevice
if (nullptr == hdevice
|| 0 >= size)
return -1;
// this functionality is not need by simX
@ -246,7 +237,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t siz
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| NULL == hbuffer)
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);

View file

@ -35,7 +35,7 @@ extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_
if (NULL == content || 0 == size)
return -1;
uint32_t buffer_transfer_size = 4096;
uint32_t buffer_transfer_size = 65536;
uint32_t kernel_base_addr = vx_dev_caps(VX_CAPS_KERNEL_BASE_ADDR);
// allocate device buffer

View file

@ -1,5 +1,5 @@
CXXFLAGS += -std=c++17 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../../sw/include
@ -12,13 +12,13 @@ SRCS = basic.cpp
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/simx -lvortex -o $@
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-ase: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)

Binary file not shown.

View file

@ -1,16 +1,20 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <iostream>
#include <unistd.h>
#include <vortex.h>
int test = -1;
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "?")) != -1) {
while ((c = getopt(argc, argv, "t:h?")) != -1) {
switch (c) {
case 't': {
test = atoi(optarg);
} break;
case 'h':
case '?': {
printf("Test.\n");
printf("Usage: [-h: help]\n");
std::cout << "Test." << std::endl;
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
exit(0);
} break;
default:
@ -23,9 +27,13 @@ uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));;
}
int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t value, int num_blocks) {
int err;
int num_failures = 0;
int run_test_0(vx_buffer_h sbuf,
vx_buffer_h dbuf,
uint32_t address,
uint64_t value,
int num_blocks) {
int ret;
int errors = 0;
// write sbuf data
for (int i = 0; i < 8 * num_blocks; ++i) {
@ -33,75 +41,162 @@ int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t valu
}
// write buffer to local memory
err = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
if (err != 0)
return -1;
std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
if (ret != 0)
return ret;
// read buffer from local memory
err = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
if (err != 0)
return -1;
std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
if (ret != 0)
return ret;
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < 8 * num_blocks; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, value);
if (curr != ref) {
printf("error @ %x: actual %ld, expected %ld\n", address + 64 * i, curr, ref);
++num_failures;
std::cout << "error @ " << std::hex << (address + 64 * i)
<< ": actual " << curr << ", expected " << ref << std::endl;
++errors;
}
}
return num_failures;
}
if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
return 0;
}
int run_test_1(vx_device_h device, const char* program) {
int ret;
// upload program
std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program);
if (ret != 0) {
return ret;
}
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
return ret;
}
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
return ret;
}
return 0;
}
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int err;
int num_failures = 0;
int ret;
// parse command arguments
parse_args(argc, argv);
// open device connection
std::cout << "open device connection" << std::endl;
vx_device_h device;
err = vx_dev_open(&device);
if (err != 0)
return -1;
ret = vx_dev_open(&device);
if (ret != 0)
return ret;
// create source buffer
vx_buffer_h sbuf;
err = vx_alloc_shared_mem(device, 4096, &sbuf);
if (err != 0) {
vx_dev_close(device);
return -1;
std::cout << "create source buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
if (ret != 0) {
cleanup();
return ret;
}
// create destination buffer
vx_buffer_h dbuf;
err = vx_alloc_shared_mem(device, 4096, &dbuf);
if (err != 0) {
vx_buf_release(sbuf);
vx_dev_close(device);
return -1;
std::cout << "create destination buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
if (ret != 0) {
cleanup();
return ret;
}
// run tests
num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2);
num_failures += run_test(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4);
num_failures += run_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
// run tests
if (0 == test || -1 == test) {
std::cout << "run test suite 0" << std::endl;
// releae buffers
vx_buf_release(sbuf);
vx_buf_release(dbuf);
ret = run_test_0(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
if (ret != 0) {
cleanup();
return ret;
}
// close device
vx_dev_close(device);
ret = run_test_0(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2);
if (ret != 0) {
cleanup();
return ret;
}
if (0 == num_failures) {
printf("Test PASSED\n");
} else {
printf("Test FAILED\n");
ret = run_test_0(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4);
if (ret != 0) {
cleanup();
return ret;
}
ret = run_test_0(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
if (ret != 0) {
cleanup();
return ret;
}
}
return num_failures;
if (1 == test || -1 == test) {
std::cout << "run test suite 1" << std::endl;
ret = run_test_1(device, "rv32ui-p-lw.bin");
if (ret != 0) {
cleanup();
return ret;
}
}
if (2 == test || -1 == test) {
std::cout << "run test suite 1" << std::endl;
ret = run_test_1(device, "rv32ui-p-sw.bin");
if (ret != 0) {
cleanup();
return ret;
}
}
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
std::cout << "Test PASSED" << std::endl;
return 0;
}

Binary file not shown.

Binary file not shown.

View file

@ -40,13 +40,13 @@ kernel.elf: $(SRCS)
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/simx -lvortex -o $@
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
run-ase: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16

Binary file not shown.

View file

@ -1,6 +1,5 @@
#include <iostream>
#include <unistd.h>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include "common.h"
@ -40,21 +39,77 @@ static void parse_args(int argc, char **argv) {
}
}
vx_device_h device;
vx_buffer_h buffer;
int run_test(vx_device_h device,
vx_buffer_h buffer,
const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
int ret;
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
return ret;
}
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
return ret;
}
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
if (ret != 0) {
return ret;
}
// download destination buffer
std::cout << "download destination buffer" << std::endl;
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
if (ret != 0) {
return ret;
}
// verify result
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = i * i;
int cur = buf_ptr[i];
if (cur != ref) {
++errors;
}
}
if (errors != 0) {
std::cout << "Found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl;
return 1;
}
}
return 0;
}
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (device) {
vx_dev_close(device);
}
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int ret;
int errors = 0;
size_t value;
kernel_arg_t kernel_arg;
@ -79,14 +134,14 @@ int main(int argc, char *argv[]) {
std::cout << "open device connection" << std::endl;
ret = vx_dev_open(&device);
if (ret != 0)
return -1;
return ret;
// upload program
std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program_file);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
// allocate device memory
@ -95,21 +150,21 @@ int main(int argc, char *argv[]) {
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
kernel_arg.src0_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
kernel_arg.src1_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
kernel_arg.dst_ptr = value;
@ -119,7 +174,7 @@ int main(int argc, char *argv[]) {
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
// populate source buffer values
@ -137,13 +192,13 @@ int main(int argc, char *argv[]) {
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
// upload kernel argument
@ -158,117 +213,29 @@ int main(int argc, char *argv[]) {
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
}
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
// run tests
std::cout << "run tests" << std::endl;
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
if (ret != 0) {
cleanup();
return -1;
return ret;
}
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
if (ret != 0) {
cleanup();
return -1;
}
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
if (ret != 0) {
cleanup();
return -1;
}
// download destination buffer
std::cout << "download destination buffer" << std::endl;
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return -1;
}
// verify result
std::cout << "verify result" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = i * i;
int cur = buf_ptr[i];
if (cur != ref) {
++errors;
}
}
}
if (errors != 0) {
printf("Found %d errors!\n", errors);
printf("FAILED!\n");
cleanup();
return -1;
}
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
cleanup();
return -1;
}
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
cleanup();
return -1;
}
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
if (ret != 0) {
cleanup();
return -1;
}
// download destination buffer
std::cout << "download destination buffer" << std::endl;
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return -1;
}
// verify result
std::cout << "verify result" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = i * i;
int cur = buf_ptr[i];
if (cur != ref) {
++errors;
}
}
return ret;
}
// cleanup
std::cout << "cleanup" << std::endl;
cleanup();
if (0 == errors) {
printf("PASSED!\n");
} else {
printf("Found %d errors!\n", errors);
printf("FAILED!\n");
}
std::cout << "PASSED!" << std::endl;
return errors;
return 0;
}

File diff suppressed because it is too large Load diff