This commit is contained in:
Blaise Tine 2019-11-25 11:39:19 -05:00
parent 6ecb4e5d28
commit ea53554215
31 changed files with 10878 additions and 1 deletions

View file

@ -33,7 +33,7 @@ QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LI
PROJECT = sgemm
SRCS = main.cc
SRCS = main.cc
all: $(PROJECT).dump $(PROJECT).hex

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
Inputs: 1138_bus.mtx vector.bin
This is a very small phantom data set.

View file

@ -0,0 +1,72 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH) -I.
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT = spmv
SRCS = main.cc parboil_opencl.c args.c gpu_info.c file.c convert_dataset.c mmio.c ocl.c
#stub.cc
all: $(PROJECT).dump $(PROJECT).hex
#parboil_opencl.o : parboil_opencl.c
# $(CC) $(CXXFLAGS) -c $^ -o $@
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug

View file

@ -0,0 +1,617 @@
#include <parboil.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
/*****************************************************************************/
/* Memory management routines */
/* Free an array of owned strings. */
void
pb_FreeStringArray(char **string_array)
{
char **p;
if (!string_array) return;
for (p = string_array; *p; p++) free(*p);
free(string_array);
}
struct pb_PlatformParam *
pb_PlatformParam(char *name, char *version)
{
if (name == NULL) {
fprintf(stderr, "pb_PlatformParam: Invalid argument\n");
exit(-1);
}
struct pb_PlatformParam *ret =
(struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam));
ret->name = name;
ret->version = version;
return ret;
}
void
pb_FreePlatformParam(struct pb_PlatformParam *p)
{
if (p == NULL) return;
free(p->name);
free(p->version);
free(p);
}
struct pb_DeviceParam *
pb_DeviceParam_index(int index)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_INDEX;
ret->index = index;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_cpu(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_CPU;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_gpu(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_GPU;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_accelerator(void)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_ACCELERATOR;
return ret;
}
struct pb_DeviceParam *
pb_DeviceParam_name(char *name)
{
struct pb_DeviceParam *ret =
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
ret->criterion = pb_Device_NAME;
ret->name = name;
return ret;
}
void
pb_FreeDeviceParam(struct pb_DeviceParam *p)
{
if (p == NULL) return;
switch(p->criterion) {
case pb_Device_NAME:
free(p->name);
break;
case pb_Device_INDEX:
case pb_Device_CPU:
case pb_Device_ACCELERATOR:
break;
default:
fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n");
exit(-1);
}
}
void
pb_FreeParameters(struct pb_Parameters *p)
{
free(p->outFile);
pb_FreeStringArray(p->inpFiles);
pb_FreePlatformParam(p->platform);
pb_FreeDeviceParam(p->device);
free(p);
}
/*****************************************************************************/
/* Parse a comma-delimited list of strings into an
* array of strings. */
static char **
read_string_array(char *in)
{
char **ret;
int i;
int count; /* Number of items in the input */
char *substring; /* Current substring within 'in' */
/* Count the number of items in the string */
count = 1;
for (i = 0; in[i]; i++) if (in[i] == ',') count++;
/* Allocate storage */
ret = (char **)malloc((count + 1) * sizeof(char *));
/* Create copies of the strings from the list */
substring = in;
for (i = 0; i < count; i++) {
char *substring_end;
int substring_length;
/* Find length of substring */
for (substring_end = substring;
(*substring_end != ',') && (*substring_end != 0);
substring_end++);
substring_length = substring_end - substring;
/* Allocate memory and copy the substring */
ret[i] = (char *)malloc(substring_length + 1);
memcpy(ret[i], substring, substring_length);
ret[i][substring_length] = 0;
/* go to next substring */
substring = substring_end + 1;
}
ret[i] = NULL; /* Write the sentinel value */
return ret;
}
static void
report_parse_error(const char *str)
{
fputs(str, stderr);
}
/* Interpret a string as a 'pb_DeviceParam' value.
* Return a pointer to a new value, or NULL on failure.
*/
static struct pb_DeviceParam *
read_device_param(char *str)
{
/* Try different ways of interpreting 'device_string' until one works */
/* If argument is an integer, then interpret it as a device index */
errno = 0;
char *end;
long device_int = strtol(str, &end, 10);
if (!errno) {
/* Negative numbers are not valid */
if (device_int < 0 || device_int > INT_MAX) return NULL;
return pb_DeviceParam_index(device_int);
}
/* Match against predefined strings */
if (strcmp(str, "CPU") == 0)
return pb_DeviceParam_cpu();
if (strcmp(str, "GPU") == 0)
return pb_DeviceParam_gpu();
if (strcmp(str, "ACCELERATOR") == 0)
return pb_DeviceParam_accelerator();
/* Assume any other string is a device name */
return pb_DeviceParam_name(strdup(str));
}
/* Interpret a string as a 'pb_PlatformParam' value.
* Return a pointer to a new value, or NULL on failure.
*/
static struct pb_PlatformParam *
read_platform_param(char *str)
{
int separator_index; /* Index of the '-' character separating
* name and version number. It's -1 if
* there's no '-' character. */
/* Find the last occurrence of '-' in 'str' */
{
char *cur;
separator_index = -1;
for (cur = str; *cur; cur++) {
if (*cur == '-') separator_index = cur - str;
}
}
/* The platform name is either the entire string, or all characters before
* the separator */
int name_length = separator_index == -1 ? strlen(str) : separator_index;
char *name_str = (char *)malloc(name_length + 1);
memcpy(name_str, str, name_length);
name_str[name_length] = 0;
/* The version is either NULL, or all characters after the separator */
char *version_str;
if (separator_index == -1) {
version_str = NULL;
}
else {
const char *version_input_str = str + separator_index + 1;
int version_length = strlen(version_input_str);
version_str = (char *)malloc(version_length + 1);
memcpy(version_str, version_input_str, version_length);
version_str[version_length] = 0;
}
/* Create output structure */
return pb_PlatformParam(name_str, version_str);
}
/****************************************************************************/
/* Argument parsing state */
/* Argument parsing state.
*
* Arguments that are interpreted by the argument parser are removed from
* the list. Variables 'argc' and 'argn' do not count arguments that have
* been removed.
*
* During argument parsing, the array of arguments is compacted, overwriting
* the erased arguments. Variable 'argv_put' points to the array element
* where the next argument will be written. Variable 'argv_get' points to
* the array element where the next argument will be read from.
*/
struct argparse {
int argc; /* Number of arguments. Mutable. */
int argn; /* Current argument index. */
char **argv_get; /* Argument value being read. */
char **argv_put; /* Argument value being written.
* argv_put <= argv_get. */
};
static void
initialize_argparse(struct argparse *ap, int argc, char **argv)
{
ap->argc = argc;
ap->argn = 0;
ap->argv_get = ap->argv_put = argv;
}
/* Finish argument parsing, without processing the remaining arguments.
* Write new argument count into _argc. */
static void
finalize_argparse(struct argparse *ap, int *_argc, char **argv)
{
/* Move the remaining arguments */
for(; ap->argn < ap->argc; ap->argn++)
*ap->argv_put++ = *ap->argv_get++;
/* Update the argument count */
*_argc = ap->argc;
/* Insert a terminating NULL */
argv[ap->argc] = NULL;
}
/* Delete the current argument. The argument will not be visible
* when argument parsing is done. */
static void
delete_argument(struct argparse *ap)
{
if (ap->argn >= ap->argc) {
fprintf(stderr, "delete_argument\n");
}
ap->argc--;
ap->argv_get++;
}
/* Go to the next argument. Also, move the current argument to its
* final location in argv. */
static void
next_argument(struct argparse *ap)
{
if (ap->argn >= ap->argc) {
fprintf(stderr, "next_argument\n");
}
/* Move argument to its new location. */
*ap->argv_put++ = *ap->argv_get++;
ap->argn++;
}
static int
is_end_of_arguments(struct argparse *ap)
{
return ap->argn == ap->argc;
}
/* Get the current argument */
static char *
get_argument(struct argparse *ap)
{
return *ap->argv_get;
}
/* Get the current argument, and also delete it */
static char *
consume_argument(struct argparse *ap)
{
char *ret = get_argument(ap);
delete_argument(ap);
return ret;
}
/****************************************************************************/
/* The result of parsing a command-line argument */
typedef enum {
ARGPARSE_OK, /* Success */
ARGPARSE_ERROR, /* Error */
ARGPARSE_DONE /* Success, and do not continue parsing */
} result;
typedef result parse_action(struct argparse *ap, struct pb_Parameters *params);
/* A command-line option */
struct option {
char short_name; /* If not 0, the one-character
* name of this option */
const char *long_name; /* If not NULL, the long name of this option */
parse_action *action; /* What to do when this option occurs.
* Sentinel value is NULL.
*/
};
/* Output file
*
* -o FILE
*/
static result
parse_output_file(struct argparse *ap, struct pb_Parameters *params)
{
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting file name after '-o'\n");
return ARGPARSE_ERROR;
}
/* Replace the output file name */
free(params->outFile);
params->outFile = strdup(consume_argument(ap));
return ARGPARSE_OK;
}
/* Input files
*
* -i FILE,FILE,...
*/
static result
parse_input_files(struct argparse *ap, struct pb_Parameters *params)
{
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting file name after '-i'\n");
return ARGPARSE_ERROR;
}
/* Replace the input file list */
pb_FreeStringArray(params->inpFiles);
params->inpFiles = read_string_array(consume_argument(ap));
return ARGPARSE_OK;
}
/* End of options
*
* --
*/
static result
parse_end_options(struct argparse *ap, struct pb_Parameters *params)
{
return ARGPARSE_DONE;
}
/* OpenCL device
*
* --device X
*/
static result
parse_device(struct argparse *ap, struct pb_Parameters *params)
{
/* Read the next argument, which specifies a device */
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting device specification after '--device'\n");
return ARGPARSE_ERROR;
}
char *device_string = consume_argument(ap);
struct pb_DeviceParam *device_param = read_device_param(device_string);
if (!device_param) {
report_parse_error("Unrecognized device specification format on command line\n");
return ARGPARSE_ERROR;
}
/* Save the result */
pb_FreeDeviceParam(params->device);
params->device = device_param;
return ARGPARSE_OK;
}
static result
parse_platform(struct argparse *ap, struct pb_Parameters *params)
{
/* Read the next argument, which specifies a platform */
if (is_end_of_arguments(ap))
{
report_parse_error("Expecting device specification after '--platform'\n");
return ARGPARSE_ERROR;
}
char *platform_string = consume_argument(ap);
struct pb_PlatformParam *platform_param = read_platform_param(platform_string);
if (!platform_param) {
report_parse_error("Unrecognized platform specification format on command line\n");
return ARGPARSE_ERROR;
}
/* Save the result */
pb_FreePlatformParam(params->platform);
params->platform = platform_param;
return ARGPARSE_OK;
}
static struct option options[] = {
{ 'o', NULL, &parse_output_file },
{ 'i', NULL, &parse_input_files },
{ '-', NULL, &parse_end_options },
{ 0, "device", &parse_device },
{ 0, "platform", &parse_platform },
{ 0, NULL, NULL }
};
static int
is_last_option(struct option *op)
{
return op->action == NULL;
}
/****************************************************************************/
/* Parse command-line parameters.
* Return zero on error, nonzero otherwise.
* On error, the other outputs may be invalid.
*
* The information collected from parameters is used to update
* 'ret'. 'ret' should be initialized.
*
* '_argc' and 'argv' are updated to contain only the unprocessed arguments.
*/
static int
pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv)
{
char *err_message;
struct argparse ap;
/* Each argument */
initialize_argparse(&ap, *_argc, argv);
while(!is_end_of_arguments(&ap)) {
result arg_result; /* Result of parsing this option */
char *arg = get_argument(&ap);
/* Process this argument */
if (arg[0] == '-') {
/* Single-character flag */
if ((arg[1] != 0) && (arg[2] == 0)) {
delete_argument(&ap); /* This argument is consumed here */
/* Find a matching short option */
struct option *op;
for (op = options; !is_last_option(op); op++) {
if (op->short_name == arg[1]) {
arg_result = (*op->action)(&ap, ret);
goto option_was_processed;
}
}
/* No option matches */
report_parse_error("Unexpected command-line parameter\n");
arg_result = ARGPARSE_ERROR;
goto option_was_processed;
}
/* Long flag */
if (arg[1] == '-') {
delete_argument(&ap); /* This argument is consumed here */
/* Find a matching long option */
struct option *op;
for (op = options; !is_last_option(op); op++) {
if (op->long_name && strcmp(&arg[2], op->long_name) == 0) {
arg_result = (*op->action)(&ap, ret);
goto option_was_processed;
}
}
/* No option matches */
report_parse_error("Unexpected command-line parameter\n");
arg_result = ARGPARSE_ERROR;
goto option_was_processed;
}
}
else {
/* Other arguments are ignored */
next_argument(&ap);
arg_result = ARGPARSE_OK;
goto option_was_processed;
}
option_was_processed:
/* Decide what to do next based on 'arg_result' */
switch(arg_result) {
case ARGPARSE_OK:
/* Continue processing */
break;
case ARGPARSE_ERROR:
/* Error exit from the function */
return 0;
case ARGPARSE_DONE:
/* Normal exit from the argument parsing loop */
goto end_of_options;
}
} /* end for each argument */
/* If all arguments were processed, then normal exit from the loop */
end_of_options:
finalize_argparse(&ap, _argc, argv);
return 1;
}
/*****************************************************************************/
/* Other exported functions */
struct pb_Parameters *
pb_ReadParameters(int *_argc, char **argv)
{
struct pb_Parameters *ret =
(struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
/* Initialize the parameters structure */
ret->outFile = NULL;
ret->inpFiles = (char **)malloc(sizeof(char *));
ret->inpFiles[0] = NULL;
ret->platform = NULL;
ret->device = NULL;
/* Read parameters and update _argc, argv */
if (!pb_ParseParameters(ret, _argc, argv)) {
/* Parse error */
pb_FreeParameters(ret);
return NULL;
}
return ret;
}
int
pb_Parameters_CountInputs(struct pb_Parameters *p)
{
int n;
for (n = 0; p->inpFiles[n]; n++);
return n;
}

View file

@ -0,0 +1,356 @@
/*
* NOTES:
*
* 1) Matrix Market files are always 1-based, i.e. the index of the first
* element of a matrix is (1,1), not (0,0) as in C. ADJUST THESE
* OFFSETS ACCORDINGLY when reading and writing
* to files.
*
* 2) ANSI C requires one to use the "l" format modifier when reading
* double precision floating point numbers in scanf() and
* its variants. For example, use "%lf", "%lg", or "%le"
* when reading doubles, otherwise errors will occur.
*/
#include "convert_dataset.h"
#include "mmio.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct _mat_entry {
int row, col; /* i,j */
float val;
} mat_entry;
typedef struct _row_stats { // stats on each row
int index;
int size;
int start;
int padding;
} row_stats;
int sort_rows(const void *a, const void *b) {
return (((mat_entry *)a)->row - ((mat_entry *)b)->row);
}
int sort_cols(const void *a, const void *b) {
return (((mat_entry *)a)->col - ((mat_entry *)b)->col);
}
/* sorts largest by size first */
int sort_stats(const void *a, const void *b) {
return (((row_stats *)b)->size - ((row_stats *)a)->size);
}
/*
* COO to JDS matrix conversion.
*
* Needs to output both column and row major JDS formats
* with the minor unit padded to a multiple of `pad_minor`
* and the major unit arranged into groups of `group_size`
*
* Major unit is col, minor is row. Each block is either a scalar or vec4
*
* Inputs:
* mtx_filename - the file in COO format
* pad_rows - multiple of packed groups to pad each row to
* warp_size - each group of `warp_size` cols is padded to the same amount
* pack_size - number of items to pack
* mirrored - is the input mtx file a symmetric matrix? The other half will be
* filled in if this is =1
* binary - does the sparse matrix file have values in the format "%d %d"
* or "%d %d %lg"?
* debug_level - 0 for no output, 1 for simple JDS data, 2 for visual grid
* Outputs:
* data - the raw data, padded and grouped as requested
* data_row_ptr - pointer offset into the `data` output, referenced
* by the current row loop index
* nz_count - number of non-zero entries in each row
* indexed by col / warp_size
* data_col_index - corresponds to the col that the same
* array index in `data` is at
* data_row_map - JDS row to real row
* data_cols - number of columns the output JDS matrix has
* dim - dimensions of the input matrix
* data_ptr_len - size of data_row_ptr (maps to original `depth` var)
*/
int coo_to_jds(char *mtx_filename, int pad_rows, int warp_size, int pack_size,
int mirrored, int binary, int debug_level, float **data,
int **data_row_ptr, int **nz_count, int **data_col_index,
int **data_row_map, int *data_cols, int *dim, int *len,
int *nz_count_len, int *data_ptr_len) {
int ret_code;
MM_typecode matcode;
FILE *f;
int nz;
int i;
float *val;
mat_entry *entries;
row_stats *stats;
int rows, cols;
if ((f = fopen(mtx_filename, "r")) == NULL)
exit(1);
printf("OK**\n");
if (mm_read_banner(f, &matcode) != 0) {
printf("Could not process Matrix Market banner.\n");
exit(1);
}
printf("OK**\n");
/* This is how one can screen matrix types if their application */
/* only supports a subset of the Matrix Market data types. */
if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
mm_is_sparse(matcode)) {
printf("Sorry, this application does not support ");
printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
exit(1);
}
/* find out size of sparse matrix .... */
if ((ret_code = mm_read_mtx_crd_size(f, &rows, &cols, &nz)) != 0)
exit(1);
*dim = rows;
if (mirrored) {
// max possible size, might be less because diagonal values aren't doubled
entries = (mat_entry *)malloc(2 * nz * sizeof(mat_entry));
} else {
entries = (mat_entry *)malloc(nz * sizeof(mat_entry));
}
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
int cur_i = 0; // to account for mirrored diagonal entries
for (i = 0; i < nz; i++, cur_i++) {
if (!binary) {
fscanf(f, "%d %d %f\n", &entries[cur_i].row, &entries[cur_i].col,
&entries[cur_i].val);
} else {
fscanf(f, "%d %d\n", &entries[cur_i].row, &entries[cur_i].col);
entries[cur_i].val = 1.0;
}
entries[cur_i].row--;
entries[cur_i].col--;
// printf("%d,%d = %f\n", entries[cur_i].row, entries[cur_i].col,
// entries[cur_i].val);
if (mirrored) {
// fill in mirrored diagonal
if (entries[cur_i].row != entries[cur_i].col) { // not a diagonal value
cur_i++;
entries[cur_i].val = entries[cur_i - 1].val;
entries[cur_i].col = entries[cur_i - 1].row;
entries[cur_i].row = entries[cur_i - 1].col;
// printf("%d,%d = %f\n", entries[cur_i].row, entries[cur_i].col,
// entries[cur_i].val);
}
}
}
// set new non-zero count
nz = cur_i;
if (debug_level >= 1) {
printf("Converting COO to JDS format (%dx%d)\n%d matrix entries, warp size "
"= %d, "
"row padding align = %d, pack size = %d\n\n",
rows, cols, nz, warp_size, pad_rows, pack_size);
}
if (f != stdin)
fclose(f);
/*
* Now we have an array of values in entries
* Transform to padded JDS format - sort by rows, then fubini
*/
int irow, icol = 0, istart = 0;
int total_size = 0;
/* Loop through each entry to figure out padding, grouping that determine
* final data array size
*
* First calculate stats for each row
*
* Collect stats using the major_stats typedef
*/
qsort(entries, nz, sizeof(mat_entry), sort_rows); // sort by row number
rows = entries[nz - 1].row + 1; // last item is greatest row (zero indexed)
if (rows % warp_size) { // pad group number to warp_size here
rows += warp_size - rows % warp_size;
}
stats = (row_stats *)calloc(rows, sizeof(row_stats)); // set to 0
*data_row_map = (int *)calloc(rows, sizeof(int));
irow = entries[0].row; // set first row
// printf("First row %d\n", irow);
for (i = 0; i < nz; i++) { // loop through each sorted entry
if (entries[i].row != irow || i == nz - 1) { // new row
// printf("%d != %d\n", entries[i].row, irow);
if (i == nz - 1) {
// last item, add it to current row
// printf("Last item i=%d, row=%d, irow=%d\n", i, entries[i].row, irow);
icol++;
}
// hit a new row, record stats for the last row (i-1)
stats[irow].size = icol; // record # cols in previous row
stats[irow].index = entries[i - 1].row; // row # for previous stat item
// printf("Row %d, i=%d, irow=%d\n", entries[i].row, i, irow);
stats[irow].start = istart; // starting location in entries array
// set stats for the next row until this break again
icol = 0; // reset row items
irow = entries[i].row;
istart = i;
}
icol++; // keep track of number of items in this row
}
*nz_count_len = rows / warp_size + rows % warp_size;
*nz_count =
(int *)malloc(*nz_count_len * sizeof(int)); // only one value per group
/* sort based upon row size, greatest first */
qsort(stats, rows, sizeof(row_stats), sort_stats);
/* figure out padding and grouping */
if (debug_level >= 1) {
printf("Padding data....%d rows, %d groups\n", rows, *nz_count_len);
}
int pad_to, total_padding = 0, pack_to;
pad_rows *= pack_size; // change padding to account for packed items
for (i = 0; i < rows; i++) {
// record JDS to real row number
(*data_row_map)[i] = stats[i].index;
if (i < rows - 1) {
// (*data_row_map)[i]--; // ???? no idea why this is off by 1
}
// each row is padded so the number of packed groups % pad_rows == 0
if (i % warp_size ==
0) { // on a group boundary with the largest number of items
// find padding in individual items
if (stats[i].size % pad_rows) {
stats[i].padding =
pad_rows - (stats[i].size % pad_rows); // find padding
} else {
stats[i].padding = 0; // no padding necessary, already at pad multiple
}
if (stats[i].size % pack_size) {
pack_to = ceil((float)stats[i].size / pack_size);
} else {
pack_to = stats[i].size / pack_size;
}
// pack_to = stats[i].size + (!stats[i].size%pack_size) ? 0 : (pack_size -
// stats[i].size%pack_size);
pad_to = stats[i].size +
stats[i].padding; // total size of this row, with padding
// TODO: change this to reflect the real number of nonzero packed items,
// not the padded
// value
(*nz_count)[i / warp_size] =
pack_to; // number of packed items in this group
total_size += pad_to * warp_size; // allocate size for this padded group
if (debug_level >= 2)
printf("Padding warp group %d to %d items, zn = %d\n", i / warp_size,
pad_to, pack_to);
} else {
stats[i].padding = pad_to - stats[i].size;
}
total_padding += stats[i].padding;
// if (debug_level >= 2)
// printf("Row %d, %d items, %d padding\n", stats[i].index,
// stats[i].size, stats[i].padding);
}
/* allocate data and data_row_index */
if (debug_level >= 1)
printf("Allocating data space: %d entries (%f%% padding)\n", total_size,
(float)100 * total_padding / total_size);
*data = (float *)calloc(total_size,
sizeof(float)); // set to 0 so padded values are set
*data_col_index =
(int *)calloc(total_size, sizeof(int)); // any unset indexes point to 0
*data_row_ptr = (int *)calloc(rows, sizeof(int));
*len = total_size;
i = 0; // data index, including padding
/*
* Keep looping through each row, writing data a group at a time
* to the output array. Increment `irow` each time, and use it as
* an index into entries along with stats.start to get the next
* data item
*/
irow = 0; // keep track of which row we are in inside the fubini-ed array
int idata = 0; // position within final data array
int entry_index, j;
int ipack; // used in internal loop for writing packed values
mat_entry entry;
while (1) {
/* record data_row_ptr */
(*data_row_ptr)[irow] = idata;
/* End condtion: the size of the greatest row is smaller than the current
Fubini-ed row */
if (stats[0].size + stats[0].padding <= irow * pack_size)
break;
// printf("Data row pointer for row %d is %d\n", irow, idata);
for (i = 0; i < rows; i++) {
/* take one packed group from each original row */
// printf("Output irow %d icol %d (real %d,%d size %d)\n", irow, i,
// entry.col, i, stats[i].size);
/* Watch out for little vs big endian, and how opencl interprets vector
* casting from pointers */
for (ipack = 0; ipack < pack_size; ipack++) {
if (stats[i].size > irow * pack_size + ipack) {
// copy value
entry_index = stats[i].start + irow * pack_size + ipack;
entry = entries[entry_index];
/* record index and value */
(*data)[idata] = entry.val;
/* each data item will get its row index from the thread, col from
* here */
(*data_col_index)[idata] = entry.col;
if (debug_level >= 2) {
if (i < 3) {
// first row debugging
printf("[%d row%d=%.3f]", ipack + 1, i, entry.val);
} else {
printf("%d", ipack + 1);
}
}
} else if (stats[i].size + stats[i].padding >
irow * pack_size + ipack) {
/* add padding to the end of each row here - this assumes padding is
* factored into allocated size */
if (debug_level >= 2)
printf("0");
(*data_col_index)[idata] = -1;
} else {
goto endwrite; // no data written this pass, so don't increment idata
}
idata += 1;
}
}
endwrite:
if (debug_level >= 2) {
printf("\n");
}
irow += 1;
}
if (debug_level >= 1)
printf("Finished converting.\nJDS format has %d columns, %d rows.\n", rows,
irow);
free(entries);
free(stats);
printf("nz_count_len = %d\n", *nz_count_len);
*data_cols = rows;
*data_ptr_len = irow + 1;
return 0;
}

View file

@ -0,0 +1,17 @@
#ifndef _CONVERT_DATASET_H
#define _CONVERT_DATASET_H
#ifdef __cplusplus
extern "C" {
#endif
int coo_to_jds(char* mtx_filename, int pad_rows, int warp_size, int pack_size,
int mirrored, int binary, int debug_level,
float** data, int** data_row_ptr, int** nz_count, int** data_col_index,
int** data_row_map, int* data_cols, int* dim, int* len, int* nz_count_len,
int* data_ptr_len);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,78 @@
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
//#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#if __BYTE_ORDER != __LITTLE_ENDIAN
# error "File I/O is not implemented for this system: wrong endianness."
#endif
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
float** h_data, int** h_indices, int** h_ptr,
int** h_perm, int** h_nzcnt)
{
FILE* fid = fopen(fName, "rb");
if (fid == NULL)
{
fprintf(stderr, "Cannot open input file\n");
exit(-1);
}
fscanf(fid, "%d %d %d %d %d\n",len,depth,nzcnt_len,dim,pad);
int _len=len[0];
int _depth=depth[0];
int _dim=dim[0];
int _pad=pad[0];
int _nzcnt_len=nzcnt_len[0];
*h_data = (float *) malloc(_len * sizeof (float));
fread (*h_data, sizeof (float), _len, fid);
*h_indices = (int *) malloc(_len * sizeof (int));
fread (*h_indices, sizeof (int), _len, fid);
*h_ptr = (int *) malloc(_depth * sizeof (int));
fread (*h_ptr, sizeof (int), _depth, fid);
*h_perm = (int *) malloc(_dim * sizeof (int));
fread (*h_perm, sizeof (int), _dim, fid);
*h_nzcnt = (int *) malloc(_nzcnt_len * sizeof (int));
fread (*h_nzcnt, sizeof (int), _nzcnt_len, fid);
fclose (fid);
}
void input_vec(char *fName,float *h_vec,int dim)
{
FILE* fid = fopen(fName, "rb");
fread (h_vec, sizeof (float), dim, fid);
fclose(fid);
}
void outputData(char* fName, float *h_Ax_vector,int dim)
{
FILE* fid = fopen(fName, "w");
uint32_t tmp32;
if (fid == NULL)
{
fprintf(stderr, "Cannot open output file\n");
exit(-1);
}
tmp32 = dim;
fwrite(&tmp32, sizeof(uint32_t), 1, fid);
fwrite(h_Ax_vector, sizeof(float), dim, fid);
fclose (fid);
}

View file

@ -0,0 +1,18 @@
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __FILEH__
#define __FILEH__
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
float** h_data, int** h_indices, int** h_ptr,
int** h_perm, int** h_nzcnt);
void input_vec(char* fNanme, float *h_vec,int dim);
void outputData(char* fName, float *h_Ax_vector,int dim);
#endif

View file

@ -0,0 +1,55 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
//#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#include "gpu_info.h"
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm)
{
int max_thread;
int max_block=8;
if(major==1)
{
if(minor>=2)
max_thread=1024;
else
max_thread=768;
}
else if(major==2)
max_thread=1536;
else
//newer GPU //keep using 2.0
max_thread=1536;
int _grid;
int _thread;
if(task*pad>sm*max_thread)
{
_thread=max_thread/max_block;
_grid = ((task*pad+_thread-1)/_thread)*_thread;
}
else
{
_thread=pad;
_grid=task*pad;
}
thread[0]=_thread;
grid[0]=_grid;
}

View file

@ -0,0 +1,20 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef __GPUINFOH__
#define __GPUINFOH__
void compute_active_thread(size_t *thread,
size_t *grid,
int task,
int pad,
int major,
int minor,
int sm);
#endif

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -0,0 +1,3 @@
Inputs: 1138_bus.mtx vector.bin
This is a very small phantom data set.

Binary file not shown.

View file

@ -0,0 +1,36 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
__kernel void spmv_jds_naive(__global float *dst_vector, __global float *d_data,
__global int *d_index, __global int *d_perm,
__global float *x_vec, const int dim,
__constant int *jds_ptr_int,
__constant int *sh_zcnt_int)
{
int ix = get_global_id(0);
if (ix < dim) {
float sum = 0.0f;
// 32 is warp size
int bound=sh_zcnt_int[ix/32];
for(int k=0;k<bound;k++)
{
int j = jds_ptr_int[k] + ix;
int in = d_index[j];
float d = d_data[j];
float t = x_vec[in];
sum += d*t;
}
dst_vector[d_perm[ix]] = sum;
}
}

Binary file not shown.

View file

@ -0,0 +1,301 @@
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include <parboil.h>
#include <stdio.h>
#include <stdlib.h>
#include "convert_dataset.h"
#include "file.h"
#include "gpu_info.h"
#include "ocl.h"
static int generate_vector(float *x_vector, int dim) {
srand(54321);
int i;
for (i = 0; i < dim; i++) {
x_vector[i] = (rand() / (float)RAND_MAX);
}
return 0;
}
int main(int argc, char **argv) {
struct pb_TimerSet timers;
struct pb_Parameters *parameters;
printf("CUDA accelerated sparse matrix vector multiplication****\n");
printf("Original version by Li-Wen Chang <lchang20@illinois.edu> and "
"Shengzhao Wu<wu14@illinois.edu>\n");
printf("This version maintained by Chris Rodrigues ***********\n");
// parameters = pb_ReadParameters(&argc, argv);
parameters->inpFiles = (char **)malloc(sizeof(char *) * 3);
parameters->inpFiles[0] = (char *)malloc(100);
parameters->inpFiles[1] = (char *)malloc(100);
parameters->inpFiles[2] = NULL;
strncpy(parameters->inpFiles[0], "1138_bus.mtx", 100);
strncpy(parameters->inpFiles[1], "vector.bin", 100);
printf("OK\n");
if ((parameters->inpFiles[0] == NULL) || (parameters->inpFiles[1] == NULL)) {
fprintf(stderr, "Expecting one input filename\n");
exit(-1);
}
pb_InitializeTimerSet(&timers);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
// parameters declaration
cl_int clStatus;
pb_Context *pb_context;
pb_context = pb_InitOpenCLContext(parameters);
if (pb_context == NULL) {
fprintf(stderr, "Error: No OpenCL platform/device can be found.");
return -1;
}
printf("OK\n");
cl_device_id clDevice = (cl_device_id)pb_context->clDeviceId;
cl_platform_id clPlatform = (cl_platform_id)pb_context->clPlatformId;
cl_context clContext = (cl_context)pb_context->clContext;
cl_command_queue clCommandQueue = clCreateCommandQueue(
clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &clStatus);
CHECK_ERROR("clCreateCommandQueue")
printf("OK\n");
pb_SetOpenCL(&clContext, &clCommandQueue);
//const char *clSource[] = {readFile("src/opencl_base/kernel.cl")};
// cl_program clProgram =
// clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
cl_program clProgram = clCreateProgramWithBuiltInKernels(
clContext, 1, &clDevice, "spmv_jds_naive", &clStatus);
CHECK_ERROR("clCreateProgramWithSource")
printf("OK\n");
char clOptions[50];
sprintf(clOptions, "");
clStatus = clBuildProgram(clProgram, 1, &clDevice, clOptions, NULL, NULL);
CHECK_ERROR("clBuildProgram")
cl_kernel clKernel = clCreateKernel(clProgram, "spmv_jds_naive", &clStatus);
CHECK_ERROR("clCreateKernel")
printf("OK\n");
int len;
int depth;
int dim;
int pad = 32;
int nzcnt_len;
// host memory allocation
// matrix
float *h_data;
int *h_indices;
int *h_ptr;
int *h_perm;
int *h_nzcnt;
// vector
float *h_Ax_vector;
float *h_x_vector;
// device memory allocation
// matrix
cl_mem d_data;
cl_mem d_indices;
cl_mem d_ptr;
cl_mem d_perm;
cl_mem d_nzcnt;
// vector
cl_mem d_Ax_vector;
cl_mem d_x_vector;
cl_mem jds_ptr_int;
cl_mem sh_zcnt_int;
// load matrix from files
pb_SwitchToTimer(&timers, pb_TimerID_IO);
// inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad,
// &h_data, &h_indices, &h_ptr,
// &h_perm, &h_nzcnt);
int col_count;
printf("OK--\n");
coo_to_jds(parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx
1, // row padding
pad, // warp size
1, // pack size
1, // is mirrored?
0, // binary matrix
1, // debug level [0:2]
&h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm, &col_count, &dim,
&len, &nzcnt_len, &depth);
printf("OK++\n");
// pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
h_Ax_vector = (float *)malloc(sizeof(float) * dim);
h_x_vector = (float *)malloc(sizeof(float) * dim);
input_vec(parameters->inpFiles[1], h_x_vector, dim);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
OpenCLDeviceProp clDeviceProp;
// clStatus =
//clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,sizeof(cl_uint),&(clDeviceProp.major),NULL);
// CHECK_ERROR("clGetDeviceInfo")
// clStatus =
//clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,sizeof(cl_uint),&(clDeviceProp.minor),NULL);
// CHECK_ERROR("clGetDeviceInfo")
clStatus =
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint),
&(clDeviceProp.multiProcessorCount), NULL);
CHECK_ERROR("clGetDeviceInfo")
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
// memory allocation
d_data = clCreateBuffer(clContext, CL_MEM_READ_ONLY, len * sizeof(float),
NULL, &clStatus);
CHECK_ERROR("clCreateBuffer")
d_indices = clCreateBuffer(clContext, CL_MEM_READ_ONLY, len * sizeof(int),
NULL, &clStatus);
CHECK_ERROR("clCreateBuffer")
d_perm = clCreateBuffer(clContext, CL_MEM_READ_ONLY, dim * sizeof(int), NULL,
&clStatus);
CHECK_ERROR("clCreateBuffer")
d_x_vector = clCreateBuffer(clContext, CL_MEM_READ_ONLY, dim * sizeof(float),
NULL, &clStatus);
CHECK_ERROR("clCreateBuffer")
d_Ax_vector = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY,
dim * sizeof(float), NULL, &clStatus);
CHECK_ERROR("clCreateBuffer")
jds_ptr_int = clCreateBuffer(clContext, CL_MEM_READ_ONLY, 5000 * sizeof(int),
NULL, &clStatus);
CHECK_ERROR("clCreateBuffer")
sh_zcnt_int = clCreateBuffer(clContext, CL_MEM_READ_ONLY, 5000 * sizeof(int),
NULL, &clStatus);
CHECK_ERROR("clCreateBuffer")
clMemSet(clCommandQueue, d_Ax_vector, 0, dim * sizeof(float));
// memory copy
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_data, CL_FALSE, 0,
len * sizeof(float), h_data, 0, NULL, NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_indices, CL_FALSE, 0,
len * sizeof(int), h_indices, 0, NULL, NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_perm, CL_FALSE, 0,
dim * sizeof(int), h_perm, 0, NULL, NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_x_vector, CL_FALSE, 0,
dim * sizeof(int), h_x_vector, 0, NULL, NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus = clEnqueueWriteBuffer(clCommandQueue, jds_ptr_int, CL_FALSE, 0,
depth * sizeof(int), h_ptr, 0, NULL, NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
clStatus =
clEnqueueWriteBuffer(clCommandQueue, sh_zcnt_int, CL_TRUE, 0,
nzcnt_len * sizeof(int), h_nzcnt, 0, NULL, NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
size_t grid;
size_t block;
compute_active_thread(&block, &grid, nzcnt_len, pad, clDeviceProp.major,
clDeviceProp.minor, clDeviceProp.multiProcessorCount);
// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!grid is %d and block is
// %d=\n",grid,block);
// printf("!!! dim is %d\n",dim);
clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), &d_Ax_vector);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), &d_data);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), &d_indices);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), &d_perm);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), &d_x_vector);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 5, sizeof(int), &dim);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 6, sizeof(cl_mem), &jds_ptr_int);
CHECK_ERROR("clSetKernelArg")
clStatus = clSetKernelArg(clKernel, 7, sizeof(cl_mem), &sh_zcnt_int);
CHECK_ERROR("clSetKernelArg")
// main execution
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
int i;
for (i = 0; i < 50; i++) {
clStatus = clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, NULL, &grid,
&block, 0, NULL, NULL);
CHECK_ERROR("clEnqueueNDRangeKernel")
}
clStatus = clFinish(clCommandQueue);
CHECK_ERROR("clFinish")
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
// HtoD memory copy
clStatus =
clEnqueueReadBuffer(clCommandQueue, d_Ax_vector, CL_TRUE, 0,
dim * sizeof(float), h_Ax_vector, 0, NULL, NULL);
CHECK_ERROR("clEnqueueReadBuffer")
clStatus = clReleaseKernel(clKernel);
clStatus = clReleaseProgram(clProgram);
clStatus = clReleaseMemObject(d_data);
clStatus = clReleaseMemObject(d_indices);
clStatus = clReleaseMemObject(d_perm);
clStatus = clReleaseMemObject(d_x_vector);
clStatus = clReleaseMemObject(d_Ax_vector);
CHECK_ERROR("clReleaseMemObject")
clStatus = clReleaseCommandQueue(clCommandQueue);
clStatus = clReleaseContext(clContext);
if (parameters->outFile) {
pb_SwitchToTimer(&timers, pb_TimerID_IO);
outputData(parameters->outFile, h_Ax_vector, dim);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
//free((void *)clSource[0]);
free(h_data);
free(h_indices);
free(h_ptr);
free(h_perm);
free(h_nzcnt);
free(h_Ax_vector);
free(h_x_vector);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
pb_FreeParameters(parameters);
return 0;
}

View file

@ -0,0 +1,509 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "mmio.h"
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
double **val_, int **I_, int **J_)
{
FILE *f;
MM_typecode matcode;
int M, N, nz;
int i;
double *val;
int *I, *J;
if ((f = fopen(fname, "r")) == NULL)
return -1;
if (mm_read_banner(f, &matcode) != 0)
{
printf("mm_read_unsymetric: Could not process Matrix Market banner ");
printf(" in file [%s]\n", fname);
return -1;
}
if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
mm_is_sparse(matcode)))
{
fprintf(stderr, "Sorry, this application does not support ");
fprintf(stderr, "Market Market type: [%s]\n",
mm_typecode_to_str(matcode));
return -1;
}
/* find out size of sparse matrix: M, N, nz .... */
if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
{
fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
return -1;
}
*M_ = M;
*N_ = N;
*nz_ = nz;
/* reseve memory for matrices */
I = (int *) malloc(nz * sizeof(int));
J = (int *) malloc(nz * sizeof(int));
val = (double *) malloc(nz * sizeof(double));
*val_ = val;
*I_ = I;
*J_ = J;
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
for (i=0; i<nz; i++)
{
fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
I[i]--; /* adjust from 1-based to 0-based */
J[i]--;
}
fclose(f);
return 0;
}
int mm_is_valid(MM_typecode matcode)
{
if (!mm_is_matrix(matcode)) return 0;
if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
mm_is_skew(matcode))) return 0;
return 1;
}
int mm_read_banner(FILE *f, MM_typecode *matcode)
{
char line[MM_MAX_LINE_LENGTH];
char banner[MM_MAX_TOKEN_LENGTH];
char mtx[MM_MAX_TOKEN_LENGTH];
char crd[MM_MAX_TOKEN_LENGTH];
char data_type[MM_MAX_TOKEN_LENGTH];
char storage_scheme[MM_MAX_TOKEN_LENGTH];
char *p;
mm_clear_typecode(matcode);
if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
return MM_PREMATURE_EOF;
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
storage_scheme) != 5)
return MM_PREMATURE_EOF;
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
/* check for banner */
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
return MM_NO_HEADER;
/* first field should be "mtx" */
if (strcmp(mtx, MM_MTX_STR) != 0)
return MM_UNSUPPORTED_TYPE;
mm_set_matrix(matcode);
/* second field describes whether this is a sparse matrix (in coordinate
storgae) or a dense array */
if (strcmp(crd, MM_SPARSE_STR) == 0)
mm_set_sparse(matcode);
else
if (strcmp(crd, MM_DENSE_STR) == 0)
mm_set_dense(matcode);
else
return MM_UNSUPPORTED_TYPE;
/* third field */
if (strcmp(data_type, MM_REAL_STR) == 0)
mm_set_real(matcode);
else
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
mm_set_complex(matcode);
else
if (strcmp(data_type, MM_PATTERN_STR) == 0)
mm_set_pattern(matcode);
else
if (strcmp(data_type, MM_INT_STR) == 0)
mm_set_integer(matcode);
else
return MM_UNSUPPORTED_TYPE;
/* fourth field */
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
mm_set_general(matcode);
else
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
mm_set_symmetric(matcode);
else
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
mm_set_hermitian(matcode);
else
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
mm_set_skew(matcode);
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
{
if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = *nz = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
return 0;
else
do
{
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 3);
return 0;
}
int mm_read_mtx_array_size(FILE *f, int *M, int *N)
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d", M, N) == 2)
return 0;
else /* we have a blank line */
do
{
num_items_read = fscanf(f, "%d %d", M, N);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 2);
return 0;
}
int mm_write_mtx_array_size(FILE *f, int M, int N)
{
if (fprintf(f, "%d %d\n", M, N) != 2)
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
/*-------------------------------------------------------------------------*/
/******************************************************************/
/* use when I[], J[], and val[]J, and val[] are already allocated */
/******************************************************************/
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode)
{
int i;
if (mm_is_complex(matcode))
{
for (i=0; i<nz; i++)
if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
!= 4) return MM_PREMATURE_EOF;
}
else if (mm_is_real(matcode))
{
for (i=0; i<nz; i++)
{
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
!= 3) return MM_PREMATURE_EOF;
}
}
else if (mm_is_pattern(matcode))
{
for (i=0; i<nz; i++)
if (fscanf(f, "%d %d", &I[i], &J[i])
!= 2) return MM_PREMATURE_EOF;
}
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
double *real, double *imag, MM_typecode matcode)
{
if (mm_is_complex(matcode))
{
if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
!= 4) return MM_PREMATURE_EOF;
}
else if (mm_is_real(matcode))
{
if (fscanf(f, "%d %d %lg\n", I, J, real)
!= 3) return MM_PREMATURE_EOF;
}
else if (mm_is_pattern(matcode))
{
if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
}
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
/************************************************************************
mm_read_mtx_crd() fills M, N, nz, array of values, and return
type code, e.g. 'MCRS'
if matrix is complex, values[] is of size 2*nz,
(nz pairs of real/imaginary values)
************************************************************************/
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
double **val, MM_typecode *matcode)
{
int ret_code;
FILE *f;
if (strcmp(fname, "stdin") == 0) f=stdin;
else
if ((f = fopen(fname, "r")) == NULL)
return MM_COULD_NOT_READ_FILE;
if ((ret_code = mm_read_banner(f, matcode)) != 0)
return ret_code;
if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
mm_is_matrix(*matcode)))
return MM_UNSUPPORTED_TYPE;
if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
return ret_code;
*I = (int *) malloc(*nz * sizeof(int));
*J = (int *) malloc(*nz * sizeof(int));
*val = NULL;
if (mm_is_complex(*matcode))
{
*val = (double *) malloc(*nz * 2 * sizeof(double));
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
else if (mm_is_real(*matcode))
{
*val = (double *) malloc(*nz * sizeof(double));
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
else if (mm_is_pattern(*matcode))
{
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
if (f != stdin) fclose(f);
return 0;
}
int mm_write_banner(FILE *f, MM_typecode matcode)
{
char *str = mm_typecode_to_str(matcode);
int ret_code;
ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
free(str);
if (ret_code !=2 )
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode)
{
FILE *f;
int i;
if (strcmp(fname, "stdout") == 0)
f = stdout;
else
if ((f = fopen(fname, "w")) == NULL)
return MM_COULD_NOT_WRITE_FILE;
/* print banner followed by typecode */
fprintf(f, "%s ", MatrixMarketBanner);
fprintf(f, "%s\n", mm_typecode_to_str(matcode));
/* print matrix sizes and nonzeros */
fprintf(f, "%d %d %d\n", M, N, nz);
/* print values */
if (mm_is_pattern(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d\n", I[i], J[i]);
else
if (mm_is_real(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
else
if (mm_is_complex(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
val[2*i+1]);
else
{
if (f != stdout) fclose(f);
return MM_UNSUPPORTED_TYPE;
}
if (f !=stdout) fclose(f);
return 0;
}
/**
* Create a new copy of a string s. mm_strdup() is a common routine, but
* not part of ANSI C, so it is included here. Used by mm_typecode_to_str().
*
*/
char *mm_strdup(const char *s)
{
int len = strlen(s);
char *s2 = (char *) malloc((len+1)*sizeof(char));
return strcpy(s2, s);
}
char *mm_typecode_to_str(MM_typecode matcode)
{
char buffer[MM_MAX_LINE_LENGTH];
char *types[4];
char *mm_strdup(const char *);
int error =0;
/* check for MTX type */
if (mm_is_matrix(matcode))
types[0] = MM_MTX_STR;
else
error=1;
/* check for CRD or ARR matrix */
if (mm_is_sparse(matcode))
types[1] = MM_SPARSE_STR;
else
if (mm_is_dense(matcode))
types[1] = MM_DENSE_STR;
else
return NULL;
/* check for element data type */
if (mm_is_real(matcode))
types[2] = MM_REAL_STR;
else
if (mm_is_complex(matcode))
types[2] = MM_COMPLEX_STR;
else
if (mm_is_pattern(matcode))
types[2] = MM_PATTERN_STR;
else
if (mm_is_integer(matcode))
types[2] = MM_INT_STR;
else
return NULL;
/* check for symmetry type */
if (mm_is_general(matcode))
types[3] = MM_GENERAL_STR;
else
if (mm_is_symmetric(matcode))
types[3] = MM_SYMM_STR;
else
if (mm_is_hermitian(matcode))
types[3] = MM_HERM_STR;
else
if (mm_is_skew(matcode))
types[3] = MM_SKEW_STR;
else
return NULL;
sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
return mm_strdup(buffer);
}

View file

@ -0,0 +1,135 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
#ifndef MM_IO_H
#define MM_IO_H
#include <stdio.h>
#define MM_MAX_LINE_LENGTH 1025
#define MatrixMarketBanner "%%MatrixMarket"
#define MM_MAX_TOKEN_LENGTH 64
typedef char MM_typecode[4];
char *mm_typecode_to_str(MM_typecode matcode);
int mm_read_banner(FILE *f, MM_typecode *matcode);
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
int mm_read_mtx_array_size(FILE *f, int *M, int *N);
int mm_write_banner(FILE *f, MM_typecode matcode);
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
int mm_write_mtx_array_size(FILE *f, int M, int N);
/********************* MM_typecode query fucntions ***************************/
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
#define mm_is_dense(typecode) ((typecode)[1]=='A')
#define mm_is_array(typecode) ((typecode)[1]=='A')
#define mm_is_complex(typecode) ((typecode)[2]=='C')
#define mm_is_real(typecode) ((typecode)[2]=='R')
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
#define mm_is_integer(typecode) ((typecode)[2]=='I')
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
#define mm_is_general(typecode) ((typecode)[3]=='G')
#define mm_is_skew(typecode) ((typecode)[3]=='K')
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
/********************* MM_typecode modify fucntions ***************************/
#define mm_set_matrix(typecode) ((*typecode)[0]='M')
#define mm_set_coordinate(typecode) ((*typecode)[1]='C')
#define mm_set_array(typecode) ((*typecode)[1]='A')
#define mm_set_dense(typecode) mm_set_array(typecode)
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
#define mm_set_complex(typecode)((*typecode)[2]='C')
#define mm_set_real(typecode) ((*typecode)[2]='R')
#define mm_set_pattern(typecode)((*typecode)[2]='P')
#define mm_set_integer(typecode)((*typecode)[2]='I')
#define mm_set_symmetric(typecode)((*typecode)[3]='S')
#define mm_set_general(typecode)((*typecode)[3]='G')
#define mm_set_skew(typecode) ((*typecode)[3]='K')
#define mm_set_hermitian(typecode)((*typecode)[3]='H')
#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
(*typecode)[2]=' ',(*typecode)[3]='G')
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
/********************* Matrix Market error codes ***************************/
#define MM_COULD_NOT_READ_FILE 11
#define MM_PREMATURE_EOF 12
#define MM_NOT_MTX 13
#define MM_NO_HEADER 14
#define MM_UNSUPPORTED_TYPE 15
#define MM_LINE_TOO_LONG 16
#define MM_COULD_NOT_WRITE_FILE 17
/******************** Matrix Market internal definitions ********************
MM_matrix_typecode: 4-character sequence
ojbect sparse/ data storage
dense type scheme
string position: [0] [1] [2] [3]
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
A(array) C(omplex) H(ermitian)
P(attern) S(ymmetric)
I(nteger) K(kew)
***********************************************************************/
#define MM_MTX_STR "matrix"
#define MM_ARRAY_STR "array"
#define MM_DENSE_STR "array"
#define MM_COORDINATE_STR "coordinate"
#define MM_SPARSE_STR "coordinate"
#define MM_COMPLEX_STR "complex"
#define MM_REAL_STR "real"
#define MM_INT_STR "integer"
#define MM_GENERAL_STR "general"
#define MM_SYMM_STR "symmetric"
#define MM_HERM_STR "hermitian"
#define MM_SKEW_STR "skew-symmetric"
#define MM_PATTERN_STR "pattern"
/* high level routines */
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode);
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode);
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
MM_typecode matcode);
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
double **val_, int **I_, int **J_);
#endif

View file

@ -0,0 +1,50 @@
#include <CL/cl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "ocl.h"
char* readFile(const char* fileName)
{
FILE* fp;
fp = fopen(fileName,"r");
if(fp == NULL)
{
printf("Error 1!\n");
exit(1);
}
fseek(fp,0,SEEK_END);
long size = ftell(fp);
rewind(fp);
char* buffer = (char*)malloc(sizeof(char)*(size+1));
if(buffer == NULL)
{
printf("Error 2!\n");
fclose(fp);
exit(1);
}
size_t res = fread(buffer,1,size,fp);
if(res != size)
{
printf("Error 3!\n");
fclose(fp);
exit(1);
}
buffer[size] = 0;
fclose(fp);
return buffer;
}
void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size)
{
cl_int clStatus;
char* temp = (char*)malloc(size);
memset(temp,val,size);
clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
free(temp);
}

View file

@ -0,0 +1,21 @@
#ifndef __OCLH__
#define __OCLH__
typedef struct {
cl_uint major;
cl_uint minor;
cl_uint multiProcessorCount;
} OpenCLDeviceProp;
void clMemSet(cl_command_queue, cl_mem, int, size_t);
char* readFile(const char*);
#define CHECK_ERROR(errorMessage) \
if(clStatus != CL_SUCCESS) \
{ \
printf("Error: %s!\n",errorMessage); \
printf("Line: %d\n",__LINE__); \
exit(1); \
}
#endif

View file

@ -0,0 +1,427 @@
/*
* (c) 2007 The Board of Trustees of the University of Illinois.
*/
#include <parboil.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if _POSIX_VERSION >= 200112L
# include <sys/time.h>
#endif
/*****************************************************************************/
/* Timer routines */
static void
accumulate_time(pb_Timestamp *accum,
pb_Timestamp start,
pb_Timestamp end)
{
#if _POSIX_VERSION >= 200112L
*accum += end - start;
#else
# error "Timestamps not implemented for this system"
#endif
}
#if _POSIX_VERSION >= 200112L
static pb_Timestamp get_time()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
}
#else
# error "no supported time libraries are available on this platform"
#endif
void
pb_ResetTimer(struct pb_Timer *timer)
{
timer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
timer->elapsed = 0;
#else
# error "pb_ResetTimer: not implemented for this system"
#endif
}
void
pb_StartTimer(struct pb_Timer *timer)
{
if (timer->state != pb_Timer_STOPPED) {
fputs("Ignoring attempt to start a running timer\n", stderr);
return;
}
timer->state = pb_Timer_RUNNING;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StartTimer: not implemented for this system"
#endif
}
void
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
{
unsigned int numNotStopped = 0x3; // 11
if (timer->state != pb_Timer_STOPPED) {
fputs("Warning: Timer was not stopped\n", stderr);
numNotStopped &= 0x1; // Zero out 2^1
}
if (subtimer->state != pb_Timer_STOPPED) {
fputs("Warning: Subtimer was not stopped\n", stderr);
numNotStopped &= 0x2; // Zero out 2^0
}
if (numNotStopped == 0x0) {
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
return;
}
timer->state = pb_Timer_RUNNING;
subtimer->state = pb_Timer_RUNNING;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
if (numNotStopped & 0x2) {
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
if (numNotStopped & 0x1) {
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
}
}
#else
# error "pb_StartTimer: not implemented for this system"
#endif
}
void
pb_StopTimer(struct pb_Timer *timer)
{
pb_Timestamp fini;
if (timer->state != pb_Timer_RUNNING) {
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
return;
}
timer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StopTimer: not implemented for this system"
#endif
accumulate_time(&timer->elapsed, timer->init, fini);
timer->init = fini;
}
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
pb_Timestamp fini;
unsigned int numNotRunning = 0x3; // 0b11
if (timer->state != pb_Timer_RUNNING) {
fputs("Warning: Timer was not running\n", stderr);
numNotRunning &= 0x1; // Zero out 2^1
}
if (subtimer->state != pb_Timer_RUNNING) {
fputs("Warning: Subtimer was not running\n", stderr);
numNotRunning &= 0x2; // Zero out 2^0
}
if (numNotRunning == 0x0) {
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
return;
}
timer->state = pb_Timer_STOPPED;
subtimer->state = pb_Timer_STOPPED;
#if _POSIX_VERSION >= 200112L
{
struct timeval tv;
gettimeofday(&tv, NULL);
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
}
#else
# error "pb_StopTimer: not implemented for this system"
#endif
if (numNotRunning & 0x2) {
accumulate_time(&timer->elapsed, timer->init, fini);
timer->init = fini;
}
if (numNotRunning & 0x1) {
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
subtimer->init = fini;
}
}
/* Get the elapsed time in seconds. */
double
pb_GetElapsedTime(struct pb_Timer *timer)
{
double ret;
if (timer->state != pb_Timer_STOPPED) {
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
}
#if _POSIX_VERSION >= 200112L
ret = timer->elapsed / 1e6;
#else
# error "pb_GetElapsedTime: not implemented for this system"
#endif
return ret;
}
void
pb_InitializeTimerSet(struct pb_TimerSet *timers)
{
int n;
timers->wall_begin = get_time();
timers->current = pb_TimerID_NONE;
timers->async_markers = NULL;
for (n = 0; n < pb_TimerID_LAST; n++) {
pb_ResetTimer(&timers->timers[n]);
timers->sub_timer_list[n] = NULL; // free first?
}
}
void
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
(sizeof(struct pb_SubTimer));
int len = strlen(label);
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
sprintf(subtimer->label, "%s\0", label);
pb_ResetTimer(&subtimer->timer);
subtimer->next = NULL;
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
if (subtimerlist == NULL) {
subtimerlist = (struct pb_SubTimerList *) malloc
(sizeof(struct pb_SubTimerList));
subtimerlist->subtimer_list = subtimer;
timers->sub_timer_list[pb_Category] = subtimerlist;
} else {
// Append to list
struct pb_SubTimer *element = subtimerlist->subtimer_list;
while (element->next != NULL) {
element = element->next;
}
element->next = subtimer;
}
}
void
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
{
// switchToSub( NULL, NONE
// switchToSub( NULL, some
// switchToSub( some, some
// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed
struct pb_Timer *topLevelToStop = NULL;
if (timers->current != category && timers->current != pb_TimerID_NONE) {
// Switching to subtimer in a different category needs to stop the top-level current, different categoried timer.
// NONE shouldn't have a timer associated with it, so exclude from branch
topLevelToStop = &timers->timers[timers->current];
}
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current;
if (timers->current != pb_TimerID_NONE) {
if (curr != NULL && topLevelToStop != NULL) {
pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer);
} else if (curr != NULL) {
pb_StopTimer(&curr->timer);
} else {
pb_StopTimer(topLevelToStop);
}
}
subtimerlist = timers->sub_timer_list[category];
struct pb_SubTimer *subtimer = NULL;
if (label != NULL) {
subtimer = subtimerlist->subtimer_list;
while (subtimer != NULL) {
if (strcmp(subtimer->label, label) == 0) {
break;
} else {
subtimer = subtimer->next;
}
}
}
if (category != pb_TimerID_NONE) {
if (subtimerlist != NULL) {
subtimerlist->current = subtimer;
}
if (category != timers->current && subtimer != NULL) {
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
} else if (subtimer != NULL) {
// Same category, different non-NULL subtimer
pb_StartTimer(&subtimer->timer);
} else{
// Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer
pb_StartTimer(&timers->timers[category]);
}
}
timers->current = category;
}
void
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
{
/* Stop the currently running timer */
/*if (timers->current != pb_TimerID_NONE) {
struct pb_SubTimer *currSubTimer = NULL;
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
if ( subtimerlist != NULL) {
currSubTimer = timers->sub_timer_list[timers->current]->current;
}
if ( currSubTimer!= NULL) {
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
} else {
pb_StopTimer(&timers->timers[timers->current]);
}
}
timers->current = timer;
if (timer != pb_TimerID_NONE) {
pb_StartTimer(&timers->timers[timer]);
}*/
}
void
pb_PrintTimerSet(struct pb_TimerSet *timers)
{
pb_Timestamp wall_end = get_time();
struct pb_Timer *t = timers->timers;
struct pb_SubTimer* sub = NULL;
int maxSubLength;
const char *categories[] = {
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
};
const int maxCategoryLength = 10;
int i;
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
if(pb_GetElapsedTime(&t[i]) != 0) {
// Print Category Timer
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
if (timers->sub_timer_list[i] != NULL) {
sub = timers->sub_timer_list[i]->subtimer_list;
maxSubLength = 0;
while (sub != NULL) {
// Find longest SubTimer label
if (strlen(sub->label) > maxSubLength) {
maxSubLength = strlen(sub->label);
}
sub = sub->next;
}
// Fit to Categories
if (maxSubLength <= maxCategoryLength) {
maxSubLength = maxCategoryLength;
}
sub = timers->sub_timer_list[i]->subtimer_list;
// Print SubTimers
while (sub != NULL) {
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
sub = sub->next;
}
}
}
}
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
float walltime = (wall_end - timers->wall_begin)/ 1e6;
printf("Timer Wall Time: %f\n", walltime);
}
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
{
/* clean up all of the async event markers */
struct pb_async_time_marker_list ** event = &(timers->async_markers);
while( *event != NULL) {
struct pb_async_time_marker_list ** next = &((*event)->next);
free(*event);
(*event) = NULL;
event = next;
}
int i = 0;
for(i = 0; i < pb_TimerID_LAST; ++i) {
if (timers->sub_timer_list[i] != NULL) {
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
struct pb_SubTimer *prev = NULL;
while (subtimer != NULL) {
free(subtimer->label);
prev = subtimer;
subtimer = subtimer->next;
free(prev);
}
free(timers->sub_timer_list[i]);
}
}
}

View file

@ -0,0 +1,348 @@
/*
* (c) 2010 The Board of Trustees of the University of Illinois.
*/
#ifndef PARBOIL_HEADER
#define PARBOIL_HEADER
#include <stdio.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#include <unistd.h>
/* A platform as specified by the user on the command line */
struct pb_PlatformParam {
char *name; /* The platform name. This string is owned. */
char *version; /* The platform version; may be NULL.
* This string is owned. */
};
/* Create a PlatformParam from the given strings.
* 'name' must not be NULL. 'version' may be NULL.
* If not NULL, the strings should have been allocated by malloc(),
* and they will be owned by the returned object.
*/
struct pb_PlatformParam *
pb_PlatformParam(char *name, char *version);
void
pb_FreePlatformParam(struct pb_PlatformParam *);
/* A criterion for how to select a device */
enum pb_DeviceSelectionCriterion {
pb_Device_INDEX, /* Enumerate the devices and select one
* by its number */
pb_Device_CPU, /* Select a CPU device */
pb_Device_GPU, /* Select a GPU device */
pb_Device_ACCELERATOR, /* Select an accelerator device */
pb_Device_NAME /* Select a device by name */
};
/* A device as specified by the user on the command line */
struct pb_DeviceParam {
enum pb_DeviceSelectionCriterion criterion;
union {
int index; /* If criterion == pb_Device_INDEX,
* the index of the device */
char *name; /* If criterion == pb_Device_NAME,
* the name of the device.
* This string is owned. */
};
};
struct pb_DeviceParam *
pb_DeviceParam_index(int index);
struct pb_DeviceParam *
pb_DeviceParam_cpu(void);
struct pb_DeviceParam *
pb_DeviceParam_gpu(void);
struct pb_DeviceParam *
pb_DeviceParam_accelerator(void);
/* Create a by-name device selection criterion.
* The string should have been allocated by malloc(), and it will will be
* owned by the returned object.
*/
struct pb_DeviceParam *
pb_DeviceParam_name(char *name);
void
pb_FreeDeviceParam(struct pb_DeviceParam *);
/* Command line parameters for benchmarks */
struct pb_Parameters {
char *outFile; /* If not NULL, the raw output of the
* computation should be saved to this
* file. The string is owned. */
char **inpFiles; /* A NULL-terminated array of strings
* holding the input file(s) for the
* computation. The array and strings
* are owned. */
struct pb_PlatformParam *platform; /* If not NULL, the platform
* specified on the command line. */
struct pb_DeviceParam *device; /* If not NULL, the device
* specified on the command line. */
};
/* Read command-line parameters.
*
* The argc and argv parameters to main are read, and any parameters
* interpreted by this function are removed from the argument list.
*
* A new instance of struct pb_Parameters is returned.
* If there is an error, then an error message is printed on stderr
* and NULL is returned.
*/
struct pb_Parameters *
pb_ReadParameters(int *_argc, char **argv);
/* Free an instance of struct pb_Parameters.
*/
void
pb_FreeParameters(struct pb_Parameters *p);
void
pb_FreeStringArray(char **);
/* Count the number of input files in a pb_Parameters instance.
*/
int
pb_Parameters_CountInputs(struct pb_Parameters *p);
/* A time or duration. */
//#if _POSIX_VERSION >= 200112L
typedef unsigned long long pb_Timestamp; /* time in microseconds */
//#else
//# error "Timestamps not implemented"
//#endif
enum pb_TimerState {
pb_Timer_STOPPED,
pb_Timer_RUNNING,
};
struct pb_Timer {
enum pb_TimerState state;
pb_Timestamp elapsed; /* Amount of time elapsed so far */
pb_Timestamp init; /* Beginning of the current time interval,
* if state is RUNNING. End of the last
* recorded time interfal otherwise. */
};
/* Reset a timer.
* Use this to initialize a timer or to clear
* its elapsed time. The reset timer is stopped.
*/
void
pb_ResetTimer(struct pb_Timer *timer);
/* Start a timer. The timer is set to RUNNING mode and
* time elapsed while the timer is running is added to
* the timer.
* The timer should not already be running.
*/
void
pb_StartTimer(struct pb_Timer *timer);
/* Stop a timer.
* This stops adding elapsed time to the timer.
* The timer should not already be stopped.
*/
void
pb_StopTimer(struct pb_Timer *timer);
/* Get the elapsed time in seconds. */
double
pb_GetElapsedTime(struct pb_Timer *timer);
/* Execution time is assigned to one of these categories. */
enum pb_TimerID {
pb_TimerID_NONE = 0,
pb_TimerID_IO, /* Time spent in input/output */
pb_TimerID_KERNEL, /* Time spent computing on the device,
* recorded asynchronously */
pb_TimerID_COPY, /* Time spent synchronously moving data
* to/from device and allocating/freeing
* memory on the device */
pb_TimerID_DRIVER, /* Time spent in the host interacting with the
* driver, primarily for recording the time
* spent queueing asynchronous operations */
pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */
pb_TimerID_COMPUTE, /* Time for all program execution other
* than parsing command line arguments,
* I/O, kernel, and copy */
pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and
* host activity: automatically filled in,
* not intended for direct usage */
pb_TimerID_LAST /* Number of timer IDs */
};
/* Dynamic list of asynchronously tracked times between events */
struct pb_async_time_marker_list {
char *label; // actually just a pointer to a string
enum pb_TimerID timerID; /* The ID to which the interval beginning
* with this marker should be attributed */
void * marker;
//cudaEvent_t marker; /* The driver event for this marker */
struct pb_async_time_marker_list *next;
};
struct pb_SubTimer {
char *label;
struct pb_Timer timer;
struct pb_SubTimer *next;
};
struct pb_SubTimerList {
struct pb_SubTimer *current;
struct pb_SubTimer *subtimer_list;
};
/* A set of timers for recording execution times. */
struct pb_TimerSet {
enum pb_TimerID current;
struct pb_async_time_marker_list* async_markers;
pb_Timestamp async_begin;
pb_Timestamp wall_begin;
struct pb_Timer timers[pb_TimerID_LAST];
struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST];
};
/* Reset all timers in the set. */
void
pb_InitializeTimerSet(struct pb_TimerSet *timers);
void
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category);
/* Select which timer the next interval of time should be accounted
* to. The selected timer is started and other timers are stopped.
* Using pb_TimerID_NONE stops all timers. */
void
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer);
void
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category);
/* Print timer values to standard output. */
void
pb_PrintTimerSet(struct pb_TimerSet *timers);
/* Release timer resources */
void
pb_DestroyTimerSet(struct pb_TimerSet * timers);
void
pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr);
typedef struct pb_Device_tag {
char* name;
void* clDevice;
int id;
unsigned int in_use;
unsigned int available;
} pb_Device;
struct pb_Context_tag;
typedef struct pb_Context_tag pb_Context;
typedef struct pb_Platform_tag {
char* name;
char* version;
void* clPlatform;
unsigned int in_use;
pb_Context** contexts;
pb_Device** devices;
} pb_Platform;
struct pb_Context_tag {
void* clPlatformId;
void* clContext;
void* clDeviceId;
pb_Platform* pb_platform;
pb_Device* pb_device;
};
// verbosely print out list of platforms and their devices to the console.
pb_Platform**
pb_GetPlatforms();
// Choose a platform according to the given platform specification
pb_Platform*
pb_GetPlatform(struct pb_PlatformParam *platform);
// choose a platform: by name, name & version
pb_Platform*
pb_GetPlatformByName(const char* name);
pb_Platform*
pb_GetPlatformByNameAndVersion(const char* name, const char* version);
// Choose a device according to the given device specification
pb_Device*
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device);
pb_Device**
pb_GetDevices(pb_Platform* pb_platform);
// choose a device by name.
pb_Device*
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name);
pb_Platform*
pb_GetPlatformByEnvVars();
pb_Context*
pb_InitOpenCLContext(struct pb_Parameters* parameters);
void
pb_ReleasePlatforms();
void
pb_ReleaseContext(pb_Context* c);
void
pb_PrintPlatformInfo(pb_Context* c);
void
perf_init();
//#define MEASURE_KERNEL_TIME
#include <CL/cl.h>
#ifdef MEASURE_KERNEL_TIME
#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c))
cl_int
pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#endif
enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR };
void pb_sig_float(char*, float*, int);
void pb_sig_double(char*, double*, int);
void pb_sig_short(char*, short*, int);
void pb_sig_int(char*, int*, int);
void pb_sig_uchar(char*, unsigned char*, unsigned int);
void pb_sig_clmem(char*, cl_command_queue, cl_mem, int);
#ifdef __cplusplus
}
#endif
#endif //PARBOIL_HEADER

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,670 @@
/*
* perf_util.c - helper functions for perf_events
*
* Copyright (c) 2009 Google, Inc
* Contributed by Stephane Eranian <eranian@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <err.h>
#include <perfmon/pfmlib_perf_event.h>
#include "perf_util.h"
/* the **fd parameter must point to a null pointer on the first call
* max_fds and num_fds must both point to a zero value on the first call
* The return value is success (0) vs. failure (non-zero)
*/
int
perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds)
{
perf_event_desc_t *fd;
pfm_perf_encode_arg_t arg;
int new_max, ret, num, max_fds;
int group_leader;
if (!(argv && fds && num_fds))
return -1;
fd = *fds;
if (fd) {
max_fds = fd[0].max_fds;
if (max_fds < 2)
return -1;
num = *num_fds;
} else {
max_fds = num = 0; /* bootstrap */
}
group_leader = num;
while(*argv) {
if (num == max_fds) {
if (max_fds == 0)
new_max = 2;
else
new_max = max_fds << 1;
if (new_max < max_fds) {
warn("too many entries");
goto error;
}
fd = realloc(fd, new_max * sizeof(*fd));
if (!fd) {
warn("cannot allocate memory");
goto error;
}
/* reset newly allocated chunk */
memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd));
max_fds = new_max;
/* update max size */
fd[0].max_fds = max_fds;
}
/* ABI compatibility, set before calling libpfm */
fd[num].hw.size = sizeof(fd[num].hw);
memset(&arg, 0, sizeof(arg));
arg.attr = &fd[num].hw;
arg.fstr = &fd[num].fstr; /* fd[].fstr is NULL */
ret = pfm_get_os_event_encoding(*argv, PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &arg);
if (ret != PFM_SUCCESS) {
warnx("event %s: %s", *argv, pfm_strerror(ret));
goto error;
}
fd[num].name = strdup(*argv);
fd[num].group_leader = group_leader;
fd[num].idx = arg.idx;
num++;
argv++;
}
*num_fds = num;
*fds = fd;
return 0;
error:
perf_free_fds(fd, num);
return -1;
}
int
perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds)
{
const char **argv;
char *p, *q, *events;
int i, ret, num = 0;
if (!(ev && fd && num_fds))
return -1;
events = strdup(ev);
if (!events)
return -1;
q = events;
while((p = strchr(q, ','))) {
num++;
q = p + 1;
}
num++;
num++; /* terminator */
argv = malloc(num * sizeof(char *));
if (!argv) {
free(events);
return -1;
}
for(i=0, q = events; i < num-2; i++, q = p + 1) {
p = strchr(q, ',');
*p = '\0';
argv[i] = q;
}
argv[i++] = q;
argv[i] = NULL;
ret = perf_setup_argv_events(argv, fd, num_fds);
free(argv);
free(events); /* strdup in perf_setup_argv_events() */
return ret;
}
void
perf_free_fds(perf_event_desc_t *fds, int num_fds)
{
int i;
for (i = 0 ; i < num_fds; i++) {
free(fds[i].name);
free(fds[i].fstr);
}
free(fds);
}
int
perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx)
{
int leader;
int i;
if (idx < 0 || idx >= num)
return 0;
leader = fds[idx].group_leader;
for (i = leader + 1; i < num; i++) {
if (fds[i].group_leader != leader) {
/* This is a new group leader, so the previous
* event was the final event of the preceding
* group.
*/
return i - leader;
}
}
return i - leader;
}
int
perf_read_buffer(perf_event_desc_t *hw, void *buf, size_t sz)
{
struct perf_event_mmap_page *hdr = hw->buf;
size_t pgmsk = hw->pgmsk;
void *data;
unsigned long tail;
size_t avail_sz, m, c;
/*
* data points to beginning of buffer payload
*/
data = ((void *)hdr)+sysconf(_SC_PAGESIZE);
/*
* position of tail within the buffer payload
*/
tail = hdr->data_tail & pgmsk;
/*
* size of what is available
*
* data_head, data_tail never wrap around
*/
avail_sz = hdr->data_head - hdr->data_tail;
if (sz > avail_sz)
return -1;
/*
* sz <= avail_sz, we can satisfy the request
*/
/*
* c = size till end of buffer
*
* buffer payload size is necessarily
* a power of two, so we can do:
*/
c = pgmsk + 1 - tail;
/*
* min with requested size
*/
m = c < sz ? c : sz;
/* copy beginning */
memcpy(buf, data+tail, m);
/*
* copy wrapped around leftover
*/
if ((sz - m) > 0)
memcpy(buf+m, data, sz - m);
//printf("\nhead=%lx tail=%lx new_tail=%lx sz=%zu\n", hdr->data_head, hdr->data_tail, hdr->data_tail+sz, sz);
hdr->data_tail += sz;
return 0;
}
void
perf_skip_buffer(perf_event_desc_t *hw, size_t sz)
{
struct perf_event_mmap_page *hdr = hw->buf;
if ((hdr->data_tail + sz) > hdr->data_head)
sz = hdr->data_head - hdr->data_tail;
hdr->data_tail += sz;
}
static size_t
__perf_handle_raw(perf_event_desc_t *hw)
{
size_t sz = 0;
uint32_t raw_sz, i;
char *buf;
int ret;
ret = perf_read_buffer_32(hw, &raw_sz);
if (ret) {
warnx("cannot read raw size");
return -1;
}
sz += sizeof(raw_sz);
printf("\n\tRAWSZ:%u\n", raw_sz);
buf = malloc(raw_sz);
if (!buf) {
warn("cannot allocate raw buffer");
return -1;
}
ret = perf_read_buffer(hw, buf, raw_sz);
if (ret) {
warnx("cannot read raw data");
free(buf);
return -1;
}
if (raw_sz)
putchar('\t');
for(i=0; i < raw_sz; i++) {
printf("0x%02x ", buf[i] & 0xff );
if (((i+1) % 16) == 0)
printf("\n\t");
}
if (raw_sz)
putchar('\n');
free(buf);
return sz + raw_sz;
}
int
perf_display_sample(perf_event_desc_t *fds, int num_fds, int idx, struct perf_event_header *ehdr, FILE *fp)
{
perf_event_desc_t *hw;
struct { uint32_t pid, tid; } pid;
struct { uint64_t value, id; } grp;
uint64_t time_enabled, time_running;
size_t sz;
uint64_t type, fmt;
uint64_t val64;
const char *str;
int ret, e;
if (!fds || !fp || !ehdr || num_fds < 0 || idx < 0 || idx >= num_fds)
return -1;
sz = ehdr->size - sizeof(*ehdr);
hw = fds+idx;
type = hw->hw.sample_type;
fmt = hw->hw.read_format;
/*
* the sample_type information is laid down
* based on the PERF_RECORD_SAMPLE format specified
* in the perf_event.h header file.
* That order is different from the enum perf_event_sample_format
*/
if (type & PERF_SAMPLE_IP) {
const char *xtra = " ";
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx("cannot read IP");
return -1;
}
/*
* MISC_EXACT_IP indicates that kernel is returning
* th IIP of an instruction which caused the event, i.e.,
* no skid
*/
if (hw->hw.precise_ip && (ehdr->misc & PERF_RECORD_MISC_EXACT_IP))
xtra = " (exact) ";
fprintf(fp, "IIP:%#016"PRIx64"%s", val64, xtra);
sz -= sizeof(val64);
}
if (type & PERF_SAMPLE_TID) {
ret = perf_read_buffer(hw, &pid, sizeof(pid));
if (ret) {
warnx( "cannot read PID");
return -1;
}
fprintf(fp, "PID:%d TID:%d ", pid.pid, pid.tid);
sz -= sizeof(pid);
}
if (type & PERF_SAMPLE_TIME) {
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read time");
return -1;
}
fprintf(fp, "TIME:%'"PRIu64" ", val64);
sz -= sizeof(val64);
}
if (type & PERF_SAMPLE_ADDR) {
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read addr");
return -1;
}
fprintf(fp, "ADDR:%#016"PRIx64" ", val64);
sz -= sizeof(val64);
}
if (type & PERF_SAMPLE_ID) {
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read id");
return -1;
}
fprintf(fp, "ID:%"PRIu64" ", val64);
sz -= sizeof(val64);
}
if (type & PERF_SAMPLE_STREAM_ID) {
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read stream_id");
return -1;
}
fprintf(fp, "STREAM_ID:%"PRIu64" ", val64);
sz -= sizeof(val64);
}
if (type & PERF_SAMPLE_CPU) {
struct { uint32_t cpu, reserved; } cpu;
ret = perf_read_buffer(hw, &cpu, sizeof(cpu));
if (ret) {
warnx( "cannot read cpu");
return -1;
}
fprintf(fp, "CPU:%u ", cpu.cpu);
sz -= sizeof(cpu);
}
if (type & PERF_SAMPLE_PERIOD) {
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read period");
return -1;
}
fprintf(fp, "PERIOD:%'"PRIu64" ", val64);
sz -= sizeof(val64);
}
/* struct read_format {
* { u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 id; } && PERF_FORMAT_ID
* } && !PERF_FORMAT_GROUP
*
* { u64 nr;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
* } cntr[nr];
* } && PERF_FORMAT_GROUP
* };
*/
if (type & PERF_SAMPLE_READ) {
uint64_t values[3];
uint64_t nr;
if (fmt & PERF_FORMAT_GROUP) {
ret = perf_read_buffer_64(hw, &nr);
if (ret) {
warnx( "cannot read nr");
return -1;
}
sz -= sizeof(nr);
time_enabled = time_running = 1;
if (fmt & PERF_FORMAT_TOTAL_TIME_ENABLED) {
ret = perf_read_buffer_64(hw, &time_enabled);
if (ret) {
warnx( "cannot read timing info");
return -1;
}
sz -= sizeof(time_enabled);
}
if (fmt & PERF_FORMAT_TOTAL_TIME_RUNNING) {
ret = perf_read_buffer_64(hw, &time_running);
if (ret) {
warnx( "cannot read timing info");
return -1;
}
sz -= sizeof(time_running);
}
fprintf(fp, "ENA=%'"PRIu64" RUN=%'"PRIu64" NR=%"PRIu64"\n", time_enabled, time_running, nr);
values[1] = time_enabled;
values[2] = time_running;
while(nr--) {
grp.id = -1;
ret = perf_read_buffer_64(hw, &grp.value);
if (ret) {
warnx( "cannot read group value");
return -1;
}
sz -= sizeof(grp.value);
if (fmt & PERF_FORMAT_ID) {
ret = perf_read_buffer_64(hw, &grp.id);
if (ret) {
warnx( "cannot read leader id");
return -1;
}
sz -= sizeof(grp.id);
}
e = perf_id2event(fds, num_fds, grp.id);
if (e == -1)
str = "unknown sample event";
else
str = fds[e].name;
values[0] = grp.value;
grp.value = perf_scale(values);
fprintf(fp, "\t%'"PRIu64" %s (%"PRIu64"%s)\n",
grp.value, str,
grp.id,
time_running != time_enabled ? ", scaled":"");
}
} else {
/*
* this program does not use FORMAT_GROUP when there is only one event
*/
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read value");
return -1;
}
sz -= sizeof(val64);
if (fmt & PERF_FORMAT_TOTAL_TIME_ENABLED) {
ret = perf_read_buffer_64(hw, &time_enabled);
if (ret) {
warnx( "cannot read timing info");
return -1;
}
sz -= sizeof(time_enabled);
}
if (fmt & PERF_FORMAT_TOTAL_TIME_RUNNING) {
ret = perf_read_buffer_64(hw, &time_running);
if (ret) {
warnx( "cannot read timing info");
return -1;
}
sz -= sizeof(time_running);
}
if (fmt & PERF_FORMAT_ID) {
ret = perf_read_buffer_64(hw, &val64);
if (ret) {
warnx( "cannot read leader id");
return -1;
}
sz -= sizeof(val64);
}
fprintf(fp, "ENA=%'"PRIu64" RUN=%'"PRIu64"\n", time_enabled, time_running);
values[0] = val64;
values[1] = time_enabled;
values[2] = time_running;
val64 = perf_scale(values);
fprintf(fp, "\t%'"PRIu64" %s %s\n",
val64, fds[0].name,
time_running != time_enabled ? ", scaled":"");
}
}
if (type & PERF_SAMPLE_CALLCHAIN) {
uint64_t nr, ip;
ret = perf_read_buffer_64(hw, &nr);
if (ret) {
warnx( "cannot read callchain nr");
return -1;
}
sz -= sizeof(nr);
while(nr--) {
ret = perf_read_buffer_64(hw, &ip);
if (ret) {
warnx( "cannot read ip");
return -1;
}
sz -= sizeof(ip);
fprintf(fp, "\t0x%"PRIx64"\n", ip);
}
}
if (type & PERF_SAMPLE_RAW) {
ret = __perf_handle_raw(hw);
if (ret == -1)
return -1;
sz -= ret;
}
/*
* if we have some data left, it is because there is more
* than what we know about. In fact, it is more complicated
* because we may have the right size but wrong layout. But
* that's the best we can do.
*/
if (sz) {
warnx("did not correctly parse sample leftover=%zu", sz);
perf_skip_buffer(hw, sz);
}
fputc('\n',fp);
return 0;
}
uint64_t
display_lost(perf_event_desc_t *hw, perf_event_desc_t *fds, int num_fds, FILE *fp)
{
struct { uint64_t id, lost; } lost;
const char *str;
int e, ret;
ret = perf_read_buffer(hw, &lost, sizeof(lost));
if (ret) {
warnx("cannot read lost info");
return 0;
}
e = perf_id2event(fds, num_fds, lost.id);
if (e == -1)
str = "unknown lost event";
else
str = fds[e].name;
fprintf(fp, "<<<LOST %"PRIu64" SAMPLES FOR EVENT %s>>>\n",
lost.lost,
str);
return lost.lost;
}
void
display_exit(perf_event_desc_t *hw, FILE *fp)
{
struct { pid_t pid, ppid, tid, ptid; } grp;
int ret;
ret = perf_read_buffer(hw, &grp, sizeof(grp));
if (ret) {
warnx("cannot read exit info");
return;
}
fprintf(fp,"[%d] exited\n", grp.pid);
}
void
display_freq(int mode, perf_event_desc_t *hw, FILE *fp)
{
struct { uint64_t time, id, stream_id; } thr;
int ret;
ret = perf_read_buffer(hw, &thr, sizeof(thr));
if (ret) {
warnx("cannot read throttling info");
return;
}
fprintf(fp, "%s value=%"PRIu64" event ID=%"PRIu64"\n",
mode ? "Throttled" : "Unthrottled",
thr.id,
thr.stream_id);
}

View file

@ -0,0 +1,184 @@
/*
* perf_util.h - helper functions for perf_events
*
* Copyright (c) 2009 Google, Inc
* Contributed by Stephane Eranian <eranian@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __PERF_UTIL_H__
#define __PERF_UTIL_H__
#include <sys/types.h>
#include <inttypes.h>
//#include <err.h>
//#include <perfmon/pfmlib_perf_event.h>
typedef struct {
//struct perf_event_attr hw;
uint64_t values[3];
uint64_t prev_values[3];
char *name;
uint64_t id; /* event id kernel */
void *buf;
size_t pgmsk;
int group_leader;
int fd;
int max_fds;
int idx; /* opaque libpfm event identifier */
char *fstr; /* fstr from library, must be freed */
} perf_event_desc_t;
/* handy shortcut */
#define PERF_FORMAT_SCALE (PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING)
#ifdef __cplusplus
extern "C" {
#endif
extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, int *num_fds);
extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd, int *num_fds);
extern int perf_read_buffer(perf_event_desc_t *hw, void *buf, size_t sz);
extern void perf_free_fds(perf_event_desc_t *fds, int num_fds);
extern void perf_skip_buffer(perf_event_desc_t *hw, size_t sz);
extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int leader);
extern int perf_display_sample(perf_event_desc_t *fds, int num_fds, int idx, struct perf_event_header *ehdr, FILE *fp);
extern uint64_t display_lost(perf_event_desc_t *hw, perf_event_desc_t *fds, int num_fds, FILE *fp);
extern void display_exit(perf_event_desc_t *hw, FILE *fp);
extern void display_freq(int mode, perf_event_desc_t *hw, FILE *fp);
#ifdef __cplusplus
};
#endif
static inline int
perf_read_buffer_32(perf_event_desc_t *hw, void *buf)
{
return perf_read_buffer(hw, buf, sizeof(uint32_t));
}
static inline int
perf_read_buffer_64(perf_event_desc_t *hw, void *buf)
{
return perf_read_buffer(hw, buf, sizeof(uint64_t));
}
/*
* values[0] = raw count
* values[1] = TIME_ENABLED
* values[2] = TIME_RUNNING
*/
static inline uint64_t
perf_scale(uint64_t *values)
{
uint64_t res = 0;
if (!values[2] && !values[1] && values[0]) {
//warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n");
}
if (values[2] > values[1]) {
//warnx("WARNING: time_running > time_enabled: %llu vs. %llu\n", values[2], values[1]);
}
if (values[2])
res = (uint64_t)((double)values[0] * values[1]/values[2]);
return res;
}
static inline uint64_t
perf_scale_valid(uint64_t *values, int* valid)
{
uint64_t res = 0;
if (!values[2] && !values[1] && values[0]) {
//warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n");
*valid = 0;
}
if (values[2] > values[1]) {
//warnx("WARNING: time_running > time_enabled: %llu vs. %llu\n", values[2], values[1]);
*valid = 0;
} else {
*valid = 1;
}
if (values[2])
res = (uint64_t)((double)values[0] * values[1]/values[2]);
return res;
}
static inline uint64_t
perf_scale_delta(uint64_t *values, uint64_t *prev_values)
{
uint64_t res = 0;
if (!values[2] && !values[1] && values[0])
//warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n");
if (values[2] > values[1])
//warnx("WARNING: time_running > time_enabled\n");
if (values[2] - prev_values[2])
res = (uint64_t)((double)((values[0] - prev_values[0]) * (values[1] - prev_values[1])/ (values[2] - prev_values[2])));
return res;
}
/*
* TIME_RUNNING/TIME_ENABLED
*/
static inline double
perf_scale_ratio(uint64_t *values)
{
if (!values[1])
return 0.0;
return values[2]*1.0/values[1];
}
static inline int
perf_fd2event(perf_event_desc_t *fds, int num_events, int fd)
{
int i;
for(i=0; i < num_events; i++)
if (fds[i].fd == fd)
return i;
return -1;
}
/*
* id = PERF_FORMAT_ID
*/
static inline int
perf_id2event(perf_event_desc_t *fds, int num_events, uint64_t id)
{
int j;
for(j=0; j < num_events; j++)
if (fds[j].id == id)
return j;
return -1;
}
static inline int
perf_is_group_leader(perf_event_desc_t *fds, int idx)
{
return fds[idx].group_leader == idx;
}
#endif

View file

@ -0,0 +1,281 @@
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
#include <dirent.h>
#include "perfmon.h"
static const char* mxpa_profile_log = "mxpa_profile_%d.log";
static int enabled = 0;
// quartet's CPU supports these. Check check_events at libpfm4
// to make this adaptable.
static const char *gen_events_all[]={
"snb_ep::L3_LAT_CACHE:MISS",
"snb_ep::L3_LAT_CACHE:REFERENCE",
"snb_ep::L2_RQSTS:ALL_DEMAND_DATA_RD",
"snb_ep::L2_RQSTS:ALL_DEMAND_RD_HIT",
"perf::PERF_COUNT_HW_CACHE_L1D:ACCESS",
"perf::PERF_COUNT_HW_CACHE_L1D:MISS",
"perf::PERF_COUNT_HW_CACHE_L1D:PREFETCH",
"perf::L1-DCACHE-PREFETCH-MISSES",
"perf::PERF_COUNT_HW_CACHE_L1I:READ",
"perf::PERF_COUNT_HW_CACHE_L1I:MISS",
"perf::ITLB-LOADS",
"perf::ITLB-LOAD-MISSES",
"perf::DTLB-LOADS",
"perf::DTLB-LOAD-MISSES",
"perf::CONTEXT-SWITCHES",
"perf::CPU-MIGRATIONS",
"perf::CYCLES",
"snb_ep::RESOURCE_STALLS:ANY",
"perf::INSTRUCTIONS",
"perf::BRANCH-INSTRUCTIONS",
"perf::BRANCHES",
"perf::BRANCH-MISSES",
NULL
};
#define NUM_MAX_THREAD 256
static perf_event_desc_t *g_fds[NUM_MAX_THREAD];
static int g_nthreads;
static int num_fds = 0;
/* note: unsafe for multithreading */
static uint64_t* begins;
static void
fetch_counts(perf_event_desc_t *fds, int num_fds)
{
if (begins == 0) {
begins = (uint64_t*) malloc(num_fds * sizeof(uint64_t));
memset(begins, 0, num_fds * sizeof(uint64_t));
}
uint64_t val;
uint64_t values[3];
double ratio;
int i;
ssize_t ret;
/*
* now read the results. We use pfp_event_count because
* libpfm guarantees that counters for the events always
* come first.
*/
memset(values, 0, sizeof(values));
for (i = 0; i < num_fds; i++) {
ret = read(fds[i].fd, values, sizeof(values));
if (ret < (ssize_t)sizeof(values)) {
if (ret == -1)
fprintf(stderr, "cannot read results: %s", strerror(errno));
else
warnx("could not read event%d", i);
}
/*
* scaling is systematic because we may be sharing the PMU and
* thus may be multiplexed
*/
int valid = 0;
val = perf_scale_valid(values, &valid);
if (valid == 0) printf ("@i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
ratio = perf_scale_ratio(values);
begins[i] = val;
}
}
static void
print_counts(perf_event_desc_t *fds, int num_fds, const char *msg, FILE* fp)
{
uint64_t val;
uint64_t values[3];
double ratio;
int i;
ssize_t ret;
#if 0
fprintf(fp, "%s ------------------------------------\n", msg);
#else
fprintf(fp, "method=%s", msg);
#endif
/*
* now read the results. We use pfp_event_count because
* libpfm guarantees that counters for the events always
* come first.
*/
memset(values, 0, sizeof(values));
for (i = 0; i < num_fds; i++) {
ret = read(fds[i].fd, values, sizeof(values));
if (ret < (ssize_t)sizeof(values)) {
if (ret == -1)
fprintf(stderr, "cannot read results: %s", strerror(errno));
else
warnx("could not read event%d", i);
}
/*
* scaling is systematic because we may be sharing the PMU and
* thus may be multiplexed
*/
int valid;
val = perf_scale_valid(values, &valid);
if (valid == 0) printf ("!i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
ratio = perf_scale_ratio(values);
#if 0
fprintf(fp, "%s %'20"PRIu64" %s (%.2f%% scaling, raw=%'"PRIu64", ena=%'"PRIu64", run=%'"PRIu64")\n",
"-", // msg,
val,
fds[i].name,
(1.0-ratio)*100.0,
values[0],
values[1],
values[2]);
#else
fprintf (fp, " %s=%llu", fds[i].name, val); // valid ? val : 0);
#endif
}
fprintf (fp, "\n");
}
FILE* open_log_file(char* fname) {
FILE* fp;
if (fp = fopen(fname, "r")) {
fclose(fp);
return fopen(fname, "a");
}
fp = fopen(fname, "a");
return fp;
}
void perf_init() {
static int init = 0;
if (init) return;
init = 1;
char* prof_envvar = getenv("MXPA_PROFILE");
if (prof_envvar) {
enabled = 1;
} else {
return;
}
pfm_initialize();
}
static void get_tids(int* tids, int* number) {
char path[32];
int pid = getpid();
sprintf (path, "/proc/%d/task", pid);
struct dirent *de=NULL;
DIR *d=NULL;
d=opendir(path);
assert(d != NULL && "Null for opendir");
// Loop while not NULL
char pid_str[8];
char last[8];
sprintf (pid_str, "%d", pid);
int n = 0;
while(de = readdir(d)) {
if (!strcmp(de->d_name, ".")) continue;
if (!strcmp(de->d_name, "..")) continue;
if (!strcmp(de->d_name, pid_str)) continue;
*tids++ = atoi(de->d_name);
n++;
}
*number = n;
// printf ("Sampling thread %d\n", tid);
closedir(d);
}
void perf_start(const char* kname) {
if (!enabled) return;
char* prof_envvar = getenv("MXPA_PROFILE");
int tids[32];
int ntid;
get_tids(tids, &ntid);
g_nthreads = ntid;
int n;
for (n = 0; n < ntid; n++) {
int ret;
ret = perf_setup_list_events(prof_envvar, &(g_fds[n]), &num_fds);
perf_event_desc_t *fds = g_fds[n];
int cpu = -1;
int group_fd = -1;
int pid = tids[n];
fds[0].fd = -1;
int i;
for(i=0; i < num_fds; i++) {
fds[i].hw.read_format = PERF_FORMAT_SCALE;
fds[i].hw.disabled = 1; /* do not start now */
fds[i].hw.inherit = 1; /* XXX child process will inherit, when forked only? */
/* each event is in an independent group (multiplexing likely) */
fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, group_fd, 0);
if (fds[i].fd == -1) {
fprintf(stderr, "cannot open event %d\n", i);
exit(2);
}
}
}
prctl(PR_TASK_PERF_EVENTS_ENABLE);
}
void perf_end(const char* kname) {
if (!enabled) return;
int i, n;
prctl(PR_TASK_PERF_EVENTS_DISABLE);
static int first_time = 1;
if (first_time) {
first_time = 0;
char name[128];
for (n = 0; n < g_nthreads; n++) {
sprintf (name, mxpa_profile_log, n);
FILE* fp = fopen(name, "w");
fclose(fp);
}
}
char name[128];
for (n = 0; n < g_nthreads; n++) {
sprintf (name, mxpa_profile_log, n);
FILE* fp = open_log_file(name);
perf_event_desc_t *fds = g_fds[n];
print_counts(fds, num_fds, kname, fp);
for (i = 0; i < num_fds; i++) close(fds[i].fd);
perf_free_fds(fds, num_fds);
g_fds[n] = fds = NULL;
fclose(fp);
}
}
void pin_trace_enable(char* n) {
perf_start((const char*)n);
}
void pin_trace_disable(char* n) {
perf_end((const char*)n);
}

View file

@ -0,0 +1,20 @@
#ifndef MXPA_RUNTIME_PERF_MONITOR
#define MXPA_RUNTIME_PERF_MONITOR
#include "perf_util.h"
#ifdef __cplusplus
extern "C" {
#endif
void perf_init();
void perf_start(const char* kname);
void perf_end(const char* kname);
void perf_fini();
#ifdef __cplusplus
};
#endif
#endif

View file

@ -0,0 +1,46 @@
#ifdef __MXPA__
#include <cstdio>
#include <cstdlib>
#include <sys/types.h>
#include <unistd.h>
#include <tbb/tbb.h>
namespace {
tbb::task_scheduler_init init;
class Foo {
public:
Foo() {}
void operator() (const tbb::blocked_range<size_t>& r) const {
for (size_t i = r.begin(); i != r.end(); i++) {
printf ("");
}
}
};
}
extern "C"
void
mxpa_scheduler_init() {
tbb::parallel_for(tbb::blocked_range<size_t>(0, 100), Foo());
#if 0
char cmd[32];
int pid = getpid();
printf("----------\n");
sprintf(cmd, "ls -1 /proc/%d/task", pid);
system(cmd);
printf("----------\n");
#endif
}
#else
extern "C"
void
mxpa_scheduler_init() {
}
#endif

Binary file not shown.