mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
spmv
This commit is contained in:
parent
6ecb4e5d28
commit
ea53554215
31 changed files with 10878 additions and 1 deletions
|
@ -33,7 +33,7 @@ QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LI
|
|||
|
||||
PROJECT = sgemm
|
||||
|
||||
SRCS = main.cc
|
||||
SRCS = main.cc
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
|
|
2610
benchmarks/opencl/spmv/1138_bus.mtx
Executable file
2610
benchmarks/opencl/spmv/1138_bus.mtx
Executable file
File diff suppressed because it is too large
Load diff
3
benchmarks/opencl/spmv/DESCRIPTION
Executable file
3
benchmarks/opencl/spmv/DESCRIPTION
Executable file
|
@ -0,0 +1,3 @@
|
|||
Inputs: 1138_bus.mtx vector.bin
|
||||
|
||||
This is a very small phantom data set.
|
72
benchmarks/opencl/spmv/Makefile
Normal file
72
benchmarks/opencl/spmv/Makefile
Normal file
|
@ -0,0 +1,72 @@
|
|||
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
|
||||
POCL_INC_PATH = $(wildcard ../include)
|
||||
POCL_LIB_PATH = $(wildcard ../lib)
|
||||
VX_RT_PATH = $(wildcard ../../../runtime)
|
||||
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
|
||||
|
||||
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
|
||||
|
||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
|
||||
|
||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
|
||||
|
||||
PROJECT = spmv
|
||||
|
||||
SRCS = main.cc parboil_opencl.c args.c gpu_info.c file.c convert_dataset.c mmio.c ocl.c
|
||||
#stub.cc
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
|
||||
#parboil_opencl.o : parboil_opencl.c
|
||||
# $(CC) $(CXXFLAGS) -c $^ -o $@
|
||||
|
||||
lib$(PROJECT).a: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
|
||||
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
run: $(PROJECT).hex
|
||||
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
|
||||
|
||||
qemu: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-s: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-c: $(PROJECT).qemu
|
||||
$(GDB) $(PROJECT).qemu
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
617
benchmarks/opencl/spmv/args.c
Normal file
617
benchmarks/opencl/spmv/args.c
Normal file
|
@ -0,0 +1,617 @@
|
|||
|
||||
#include <parboil.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Memory management routines */
|
||||
|
||||
/* Free an array of owned strings. */
|
||||
void
|
||||
pb_FreeStringArray(char **string_array)
|
||||
{
|
||||
char **p;
|
||||
|
||||
if (!string_array) return;
|
||||
for (p = string_array; *p; p++) free(*p);
|
||||
free(string_array);
|
||||
}
|
||||
|
||||
struct pb_PlatformParam *
|
||||
pb_PlatformParam(char *name, char *version)
|
||||
{
|
||||
if (name == NULL) {
|
||||
fprintf(stderr, "pb_PlatformParam: Invalid argument\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
struct pb_PlatformParam *ret =
|
||||
(struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam));
|
||||
|
||||
ret->name = name;
|
||||
ret->version = version;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreePlatformParam(struct pb_PlatformParam *p)
|
||||
{
|
||||
if (p == NULL) return;
|
||||
|
||||
free(p->name);
|
||||
free(p->version);
|
||||
free(p);
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_index(int index)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_INDEX;
|
||||
ret->index = index;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_cpu(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_CPU;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_gpu(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_GPU;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_accelerator(void)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_ACCELERATOR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_name(char *name)
|
||||
{
|
||||
struct pb_DeviceParam *ret =
|
||||
(struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam));
|
||||
ret->criterion = pb_Device_NAME;
|
||||
ret->name = name;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreeDeviceParam(struct pb_DeviceParam *p)
|
||||
{
|
||||
if (p == NULL) return;
|
||||
|
||||
switch(p->criterion) {
|
||||
case pb_Device_NAME:
|
||||
free(p->name);
|
||||
break;
|
||||
case pb_Device_INDEX:
|
||||
case pb_Device_CPU:
|
||||
case pb_Device_ACCELERATOR:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_FreeParameters(struct pb_Parameters *p)
|
||||
{
|
||||
free(p->outFile);
|
||||
pb_FreeStringArray(p->inpFiles);
|
||||
pb_FreePlatformParam(p->platform);
|
||||
pb_FreeDeviceParam(p->device);
|
||||
free(p);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
/* Parse a comma-delimited list of strings into an
|
||||
* array of strings. */
|
||||
static char **
|
||||
read_string_array(char *in)
|
||||
{
|
||||
char **ret;
|
||||
int i;
|
||||
int count; /* Number of items in the input */
|
||||
char *substring; /* Current substring within 'in' */
|
||||
|
||||
/* Count the number of items in the string */
|
||||
count = 1;
|
||||
for (i = 0; in[i]; i++) if (in[i] == ',') count++;
|
||||
|
||||
/* Allocate storage */
|
||||
ret = (char **)malloc((count + 1) * sizeof(char *));
|
||||
|
||||
/* Create copies of the strings from the list */
|
||||
substring = in;
|
||||
for (i = 0; i < count; i++) {
|
||||
char *substring_end;
|
||||
int substring_length;
|
||||
|
||||
/* Find length of substring */
|
||||
for (substring_end = substring;
|
||||
(*substring_end != ',') && (*substring_end != 0);
|
||||
substring_end++);
|
||||
|
||||
substring_length = substring_end - substring;
|
||||
|
||||
/* Allocate memory and copy the substring */
|
||||
ret[i] = (char *)malloc(substring_length + 1);
|
||||
memcpy(ret[i], substring, substring_length);
|
||||
ret[i][substring_length] = 0;
|
||||
|
||||
/* go to next substring */
|
||||
substring = substring_end + 1;
|
||||
}
|
||||
ret[i] = NULL; /* Write the sentinel value */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
report_parse_error(const char *str)
|
||||
{
|
||||
fputs(str, stderr);
|
||||
}
|
||||
|
||||
/* Interpret a string as a 'pb_DeviceParam' value.
|
||||
* Return a pointer to a new value, or NULL on failure.
|
||||
*/
|
||||
static struct pb_DeviceParam *
|
||||
read_device_param(char *str)
|
||||
{
|
||||
/* Try different ways of interpreting 'device_string' until one works */
|
||||
|
||||
/* If argument is an integer, then interpret it as a device index */
|
||||
errno = 0;
|
||||
char *end;
|
||||
long device_int = strtol(str, &end, 10);
|
||||
if (!errno) {
|
||||
/* Negative numbers are not valid */
|
||||
if (device_int < 0 || device_int > INT_MAX) return NULL;
|
||||
|
||||
return pb_DeviceParam_index(device_int);
|
||||
}
|
||||
|
||||
/* Match against predefined strings */
|
||||
if (strcmp(str, "CPU") == 0)
|
||||
return pb_DeviceParam_cpu();
|
||||
if (strcmp(str, "GPU") == 0)
|
||||
return pb_DeviceParam_gpu();
|
||||
if (strcmp(str, "ACCELERATOR") == 0)
|
||||
return pb_DeviceParam_accelerator();
|
||||
|
||||
/* Assume any other string is a device name */
|
||||
return pb_DeviceParam_name(strdup(str));
|
||||
}
|
||||
|
||||
/* Interpret a string as a 'pb_PlatformParam' value.
|
||||
* Return a pointer to a new value, or NULL on failure.
|
||||
*/
|
||||
static struct pb_PlatformParam *
|
||||
read_platform_param(char *str)
|
||||
{
|
||||
int separator_index; /* Index of the '-' character separating
|
||||
* name and version number. It's -1 if
|
||||
* there's no '-' character. */
|
||||
|
||||
/* Find the last occurrence of '-' in 'str' */
|
||||
{
|
||||
char *cur;
|
||||
separator_index = -1;
|
||||
for (cur = str; *cur; cur++) {
|
||||
if (*cur == '-') separator_index = cur - str;
|
||||
}
|
||||
}
|
||||
|
||||
/* The platform name is either the entire string, or all characters before
|
||||
* the separator */
|
||||
int name_length = separator_index == -1 ? strlen(str) : separator_index;
|
||||
char *name_str = (char *)malloc(name_length + 1);
|
||||
memcpy(name_str, str, name_length);
|
||||
name_str[name_length] = 0;
|
||||
|
||||
/* The version is either NULL, or all characters after the separator */
|
||||
char *version_str;
|
||||
if (separator_index == -1) {
|
||||
version_str = NULL;
|
||||
}
|
||||
else {
|
||||
const char *version_input_str = str + separator_index + 1;
|
||||
int version_length = strlen(version_input_str);
|
||||
|
||||
version_str = (char *)malloc(version_length + 1);
|
||||
memcpy(version_str, version_input_str, version_length);
|
||||
version_str[version_length] = 0;
|
||||
}
|
||||
|
||||
/* Create output structure */
|
||||
return pb_PlatformParam(name_str, version_str);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
/* Argument parsing state */
|
||||
|
||||
/* Argument parsing state.
|
||||
*
|
||||
* Arguments that are interpreted by the argument parser are removed from
|
||||
* the list. Variables 'argc' and 'argn' do not count arguments that have
|
||||
* been removed.
|
||||
*
|
||||
* During argument parsing, the array of arguments is compacted, overwriting
|
||||
* the erased arguments. Variable 'argv_put' points to the array element
|
||||
* where the next argument will be written. Variable 'argv_get' points to
|
||||
* the array element where the next argument will be read from.
|
||||
*/
|
||||
struct argparse {
|
||||
int argc; /* Number of arguments. Mutable. */
|
||||
int argn; /* Current argument index. */
|
||||
char **argv_get; /* Argument value being read. */
|
||||
char **argv_put; /* Argument value being written.
|
||||
* argv_put <= argv_get. */
|
||||
};
|
||||
|
||||
static void
|
||||
initialize_argparse(struct argparse *ap, int argc, char **argv)
|
||||
{
|
||||
ap->argc = argc;
|
||||
ap->argn = 0;
|
||||
ap->argv_get = ap->argv_put = argv;
|
||||
}
|
||||
|
||||
/* Finish argument parsing, without processing the remaining arguments.
|
||||
* Write new argument count into _argc. */
|
||||
static void
|
||||
finalize_argparse(struct argparse *ap, int *_argc, char **argv)
|
||||
{
|
||||
/* Move the remaining arguments */
|
||||
for(; ap->argn < ap->argc; ap->argn++)
|
||||
*ap->argv_put++ = *ap->argv_get++;
|
||||
|
||||
/* Update the argument count */
|
||||
*_argc = ap->argc;
|
||||
|
||||
/* Insert a terminating NULL */
|
||||
argv[ap->argc] = NULL;
|
||||
}
|
||||
|
||||
/* Delete the current argument. The argument will not be visible
|
||||
* when argument parsing is done. */
|
||||
static void
|
||||
delete_argument(struct argparse *ap)
|
||||
{
|
||||
if (ap->argn >= ap->argc) {
|
||||
fprintf(stderr, "delete_argument\n");
|
||||
}
|
||||
ap->argc--;
|
||||
ap->argv_get++;
|
||||
}
|
||||
|
||||
/* Go to the next argument. Also, move the current argument to its
|
||||
* final location in argv. */
|
||||
static void
|
||||
next_argument(struct argparse *ap)
|
||||
{
|
||||
if (ap->argn >= ap->argc) {
|
||||
fprintf(stderr, "next_argument\n");
|
||||
}
|
||||
/* Move argument to its new location. */
|
||||
*ap->argv_put++ = *ap->argv_get++;
|
||||
ap->argn++;
|
||||
}
|
||||
|
||||
static int
|
||||
is_end_of_arguments(struct argparse *ap)
|
||||
{
|
||||
return ap->argn == ap->argc;
|
||||
}
|
||||
|
||||
/* Get the current argument */
|
||||
static char *
|
||||
get_argument(struct argparse *ap)
|
||||
{
|
||||
return *ap->argv_get;
|
||||
}
|
||||
|
||||
/* Get the current argument, and also delete it */
|
||||
static char *
|
||||
consume_argument(struct argparse *ap)
|
||||
{
|
||||
char *ret = get_argument(ap);
|
||||
delete_argument(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/* The result of parsing a command-line argument */
|
||||
typedef enum {
|
||||
ARGPARSE_OK, /* Success */
|
||||
ARGPARSE_ERROR, /* Error */
|
||||
ARGPARSE_DONE /* Success, and do not continue parsing */
|
||||
} result;
|
||||
|
||||
typedef result parse_action(struct argparse *ap, struct pb_Parameters *params);
|
||||
|
||||
|
||||
/* A command-line option */
|
||||
struct option {
|
||||
char short_name; /* If not 0, the one-character
|
||||
* name of this option */
|
||||
const char *long_name; /* If not NULL, the long name of this option */
|
||||
parse_action *action; /* What to do when this option occurs.
|
||||
* Sentinel value is NULL.
|
||||
*/
|
||||
};
|
||||
|
||||
/* Output file
|
||||
*
|
||||
* -o FILE
|
||||
*/
|
||||
static result
|
||||
parse_output_file(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting file name after '-o'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Replace the output file name */
|
||||
free(params->outFile);
|
||||
params->outFile = strdup(consume_argument(ap));
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
/* Input files
|
||||
*
|
||||
* -i FILE,FILE,...
|
||||
*/
|
||||
static result
|
||||
parse_input_files(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting file name after '-i'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Replace the input file list */
|
||||
pb_FreeStringArray(params->inpFiles);
|
||||
params->inpFiles = read_string_array(consume_argument(ap));
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
/* End of options
|
||||
*
|
||||
* --
|
||||
*/
|
||||
|
||||
static result
|
||||
parse_end_options(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
return ARGPARSE_DONE;
|
||||
}
|
||||
|
||||
/* OpenCL device
|
||||
*
|
||||
* --device X
|
||||
*/
|
||||
|
||||
static result
|
||||
parse_device(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
/* Read the next argument, which specifies a device */
|
||||
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting device specification after '--device'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
char *device_string = consume_argument(ap);
|
||||
struct pb_DeviceParam *device_param = read_device_param(device_string);
|
||||
|
||||
if (!device_param) {
|
||||
report_parse_error("Unrecognized device specification format on command line\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Save the result */
|
||||
pb_FreeDeviceParam(params->device);
|
||||
params->device = device_param;
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
static result
|
||||
parse_platform(struct argparse *ap, struct pb_Parameters *params)
|
||||
{
|
||||
/* Read the next argument, which specifies a platform */
|
||||
|
||||
if (is_end_of_arguments(ap))
|
||||
{
|
||||
report_parse_error("Expecting device specification after '--platform'\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
char *platform_string = consume_argument(ap);
|
||||
struct pb_PlatformParam *platform_param = read_platform_param(platform_string);
|
||||
|
||||
if (!platform_param) {
|
||||
report_parse_error("Unrecognized platform specification format on command line\n");
|
||||
return ARGPARSE_ERROR;
|
||||
}
|
||||
|
||||
/* Save the result */
|
||||
pb_FreePlatformParam(params->platform);
|
||||
params->platform = platform_param;
|
||||
|
||||
return ARGPARSE_OK;
|
||||
}
|
||||
|
||||
|
||||
static struct option options[] = {
|
||||
{ 'o', NULL, &parse_output_file },
|
||||
{ 'i', NULL, &parse_input_files },
|
||||
{ '-', NULL, &parse_end_options },
|
||||
{ 0, "device", &parse_device },
|
||||
{ 0, "platform", &parse_platform },
|
||||
{ 0, NULL, NULL }
|
||||
};
|
||||
|
||||
static int
|
||||
is_last_option(struct option *op)
|
||||
{
|
||||
return op->action == NULL;
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
/* Parse command-line parameters.
|
||||
* Return zero on error, nonzero otherwise.
|
||||
* On error, the other outputs may be invalid.
|
||||
*
|
||||
* The information collected from parameters is used to update
|
||||
* 'ret'. 'ret' should be initialized.
|
||||
*
|
||||
* '_argc' and 'argv' are updated to contain only the unprocessed arguments.
|
||||
*/
|
||||
static int
|
||||
pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv)
|
||||
{
|
||||
char *err_message;
|
||||
struct argparse ap;
|
||||
|
||||
/* Each argument */
|
||||
initialize_argparse(&ap, *_argc, argv);
|
||||
while(!is_end_of_arguments(&ap)) {
|
||||
result arg_result; /* Result of parsing this option */
|
||||
char *arg = get_argument(&ap);
|
||||
|
||||
/* Process this argument */
|
||||
if (arg[0] == '-') {
|
||||
/* Single-character flag */
|
||||
if ((arg[1] != 0) && (arg[2] == 0)) {
|
||||
delete_argument(&ap); /* This argument is consumed here */
|
||||
|
||||
/* Find a matching short option */
|
||||
struct option *op;
|
||||
for (op = options; !is_last_option(op); op++) {
|
||||
if (op->short_name == arg[1]) {
|
||||
arg_result = (*op->action)(&ap, ret);
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
|
||||
/* No option matches */
|
||||
report_parse_error("Unexpected command-line parameter\n");
|
||||
arg_result = ARGPARSE_ERROR;
|
||||
goto option_was_processed;
|
||||
}
|
||||
|
||||
/* Long flag */
|
||||
if (arg[1] == '-') {
|
||||
delete_argument(&ap); /* This argument is consumed here */
|
||||
|
||||
/* Find a matching long option */
|
||||
struct option *op;
|
||||
for (op = options; !is_last_option(op); op++) {
|
||||
if (op->long_name && strcmp(&arg[2], op->long_name) == 0) {
|
||||
arg_result = (*op->action)(&ap, ret);
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
|
||||
/* No option matches */
|
||||
report_parse_error("Unexpected command-line parameter\n");
|
||||
arg_result = ARGPARSE_ERROR;
|
||||
goto option_was_processed;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Other arguments are ignored */
|
||||
next_argument(&ap);
|
||||
arg_result = ARGPARSE_OK;
|
||||
goto option_was_processed;
|
||||
}
|
||||
|
||||
option_was_processed:
|
||||
/* Decide what to do next based on 'arg_result' */
|
||||
switch(arg_result) {
|
||||
case ARGPARSE_OK:
|
||||
/* Continue processing */
|
||||
break;
|
||||
|
||||
case ARGPARSE_ERROR:
|
||||
/* Error exit from the function */
|
||||
return 0;
|
||||
|
||||
case ARGPARSE_DONE:
|
||||
/* Normal exit from the argument parsing loop */
|
||||
goto end_of_options;
|
||||
}
|
||||
} /* end for each argument */
|
||||
|
||||
/* If all arguments were processed, then normal exit from the loop */
|
||||
|
||||
end_of_options:
|
||||
finalize_argparse(&ap, _argc, argv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Other exported functions */
|
||||
|
||||
struct pb_Parameters *
|
||||
pb_ReadParameters(int *_argc, char **argv)
|
||||
{
|
||||
struct pb_Parameters *ret =
|
||||
(struct pb_Parameters *)malloc(sizeof(struct pb_Parameters));
|
||||
|
||||
/* Initialize the parameters structure */
|
||||
ret->outFile = NULL;
|
||||
ret->inpFiles = (char **)malloc(sizeof(char *));
|
||||
ret->inpFiles[0] = NULL;
|
||||
ret->platform = NULL;
|
||||
ret->device = NULL;
|
||||
|
||||
/* Read parameters and update _argc, argv */
|
||||
if (!pb_ParseParameters(ret, _argc, argv)) {
|
||||
/* Parse error */
|
||||
pb_FreeParameters(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
pb_Parameters_CountInputs(struct pb_Parameters *p)
|
||||
{
|
||||
int n;
|
||||
|
||||
for (n = 0; p->inpFiles[n]; n++);
|
||||
return n;
|
||||
}
|
||||
|
356
benchmarks/opencl/spmv/convert_dataset.c
Normal file
356
benchmarks/opencl/spmv/convert_dataset.c
Normal file
|
@ -0,0 +1,356 @@
|
|||
/*
|
||||
* NOTES:
|
||||
*
|
||||
* 1) Matrix Market files are always 1-based, i.e. the index of the first
|
||||
* element of a matrix is (1,1), not (0,0) as in C. ADJUST THESE
|
||||
* OFFSETS ACCORDINGLY when reading and writing
|
||||
* to files.
|
||||
*
|
||||
* 2) ANSI C requires one to use the "l" format modifier when reading
|
||||
* double precision floating point numbers in scanf() and
|
||||
* its variants. For example, use "%lf", "%lg", or "%le"
|
||||
* when reading doubles, otherwise errors will occur.
|
||||
*/
|
||||
|
||||
#include "convert_dataset.h"
|
||||
#include "mmio.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct _mat_entry {
|
||||
int row, col; /* i,j */
|
||||
float val;
|
||||
} mat_entry;
|
||||
|
||||
typedef struct _row_stats { // stats on each row
|
||||
int index;
|
||||
int size;
|
||||
int start;
|
||||
int padding;
|
||||
} row_stats;
|
||||
|
||||
int sort_rows(const void *a, const void *b) {
|
||||
return (((mat_entry *)a)->row - ((mat_entry *)b)->row);
|
||||
}
|
||||
int sort_cols(const void *a, const void *b) {
|
||||
return (((mat_entry *)a)->col - ((mat_entry *)b)->col);
|
||||
}
|
||||
/* sorts largest by size first */
|
||||
int sort_stats(const void *a, const void *b) {
|
||||
return (((row_stats *)b)->size - ((row_stats *)a)->size);
|
||||
}
|
||||
|
||||
/*
|
||||
* COO to JDS matrix conversion.
|
||||
*
|
||||
* Needs to output both column and row major JDS formats
|
||||
* with the minor unit padded to a multiple of `pad_minor`
|
||||
* and the major unit arranged into groups of `group_size`
|
||||
*
|
||||
* Major unit is col, minor is row. Each block is either a scalar or vec4
|
||||
*
|
||||
* Inputs:
|
||||
* mtx_filename - the file in COO format
|
||||
* pad_rows - multiple of packed groups to pad each row to
|
||||
* warp_size - each group of `warp_size` cols is padded to the same amount
|
||||
* pack_size - number of items to pack
|
||||
* mirrored - is the input mtx file a symmetric matrix? The other half will be
|
||||
* filled in if this is =1
|
||||
* binary - does the sparse matrix file have values in the format "%d %d"
|
||||
* or "%d %d %lg"?
|
||||
* debug_level - 0 for no output, 1 for simple JDS data, 2 for visual grid
|
||||
* Outputs:
|
||||
* data - the raw data, padded and grouped as requested
|
||||
* data_row_ptr - pointer offset into the `data` output, referenced
|
||||
* by the current row loop index
|
||||
* nz_count - number of non-zero entries in each row
|
||||
* indexed by col / warp_size
|
||||
* data_col_index - corresponds to the col that the same
|
||||
* array index in `data` is at
|
||||
* data_row_map - JDS row to real row
|
||||
* data_cols - number of columns the output JDS matrix has
|
||||
* dim - dimensions of the input matrix
|
||||
* data_ptr_len - size of data_row_ptr (maps to original `depth` var)
|
||||
*/
|
||||
int coo_to_jds(char *mtx_filename, int pad_rows, int warp_size, int pack_size,
|
||||
int mirrored, int binary, int debug_level, float **data,
|
||||
int **data_row_ptr, int **nz_count, int **data_col_index,
|
||||
int **data_row_map, int *data_cols, int *dim, int *len,
|
||||
int *nz_count_len, int *data_ptr_len) {
|
||||
int ret_code;
|
||||
MM_typecode matcode;
|
||||
FILE *f;
|
||||
int nz;
|
||||
int i;
|
||||
float *val;
|
||||
mat_entry *entries;
|
||||
row_stats *stats;
|
||||
int rows, cols;
|
||||
|
||||
if ((f = fopen(mtx_filename, "r")) == NULL)
|
||||
exit(1);
|
||||
|
||||
printf("OK**\n");
|
||||
|
||||
if (mm_read_banner(f, &matcode) != 0) {
|
||||
printf("Could not process Matrix Market banner.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("OK**\n");
|
||||
|
||||
/* This is how one can screen matrix types if their application */
|
||||
/* only supports a subset of the Matrix Market data types. */
|
||||
|
||||
if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
|
||||
mm_is_sparse(matcode)) {
|
||||
printf("Sorry, this application does not support ");
|
||||
printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* find out size of sparse matrix .... */
|
||||
|
||||
if ((ret_code = mm_read_mtx_crd_size(f, &rows, &cols, &nz)) != 0)
|
||||
exit(1);
|
||||
*dim = rows;
|
||||
|
||||
if (mirrored) {
|
||||
// max possible size, might be less because diagonal values aren't doubled
|
||||
entries = (mat_entry *)malloc(2 * nz * sizeof(mat_entry));
|
||||
} else {
|
||||
entries = (mat_entry *)malloc(nz * sizeof(mat_entry));
|
||||
}
|
||||
|
||||
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
|
||||
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
|
||||
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
|
||||
int cur_i = 0; // to account for mirrored diagonal entries
|
||||
|
||||
for (i = 0; i < nz; i++, cur_i++) {
|
||||
if (!binary) {
|
||||
fscanf(f, "%d %d %f\n", &entries[cur_i].row, &entries[cur_i].col,
|
||||
&entries[cur_i].val);
|
||||
} else {
|
||||
fscanf(f, "%d %d\n", &entries[cur_i].row, &entries[cur_i].col);
|
||||
entries[cur_i].val = 1.0;
|
||||
}
|
||||
entries[cur_i].row--;
|
||||
entries[cur_i].col--;
|
||||
// printf("%d,%d = %f\n", entries[cur_i].row, entries[cur_i].col,
|
||||
// entries[cur_i].val);
|
||||
if (mirrored) {
|
||||
// fill in mirrored diagonal
|
||||
if (entries[cur_i].row != entries[cur_i].col) { // not a diagonal value
|
||||
cur_i++;
|
||||
entries[cur_i].val = entries[cur_i - 1].val;
|
||||
entries[cur_i].col = entries[cur_i - 1].row;
|
||||
entries[cur_i].row = entries[cur_i - 1].col;
|
||||
// printf("%d,%d = %f\n", entries[cur_i].row, entries[cur_i].col,
|
||||
// entries[cur_i].val);
|
||||
}
|
||||
}
|
||||
}
|
||||
// set new non-zero count
|
||||
nz = cur_i;
|
||||
if (debug_level >= 1) {
|
||||
printf("Converting COO to JDS format (%dx%d)\n%d matrix entries, warp size "
|
||||
"= %d, "
|
||||
"row padding align = %d, pack size = %d\n\n",
|
||||
rows, cols, nz, warp_size, pad_rows, pack_size);
|
||||
}
|
||||
if (f != stdin)
|
||||
fclose(f);
|
||||
|
||||
/*
|
||||
* Now we have an array of values in entries
|
||||
* Transform to padded JDS format - sort by rows, then fubini
|
||||
*/
|
||||
|
||||
int irow, icol = 0, istart = 0;
|
||||
int total_size = 0;
|
||||
|
||||
/* Loop through each entry to figure out padding, grouping that determine
|
||||
* final data array size
|
||||
*
|
||||
* First calculate stats for each row
|
||||
*
|
||||
* Collect stats using the major_stats typedef
|
||||
*/
|
||||
|
||||
qsort(entries, nz, sizeof(mat_entry), sort_rows); // sort by row number
|
||||
rows = entries[nz - 1].row + 1; // last item is greatest row (zero indexed)
|
||||
if (rows % warp_size) { // pad group number to warp_size here
|
||||
rows += warp_size - rows % warp_size;
|
||||
}
|
||||
stats = (row_stats *)calloc(rows, sizeof(row_stats)); // set to 0
|
||||
*data_row_map = (int *)calloc(rows, sizeof(int));
|
||||
irow = entries[0].row; // set first row
|
||||
|
||||
// printf("First row %d\n", irow);
|
||||
for (i = 0; i < nz; i++) { // loop through each sorted entry
|
||||
if (entries[i].row != irow || i == nz - 1) { // new row
|
||||
// printf("%d != %d\n", entries[i].row, irow);
|
||||
if (i == nz - 1) {
|
||||
// last item, add it to current row
|
||||
// printf("Last item i=%d, row=%d, irow=%d\n", i, entries[i].row, irow);
|
||||
icol++;
|
||||
}
|
||||
// hit a new row, record stats for the last row (i-1)
|
||||
stats[irow].size = icol; // record # cols in previous row
|
||||
stats[irow].index = entries[i - 1].row; // row # for previous stat item
|
||||
// printf("Row %d, i=%d, irow=%d\n", entries[i].row, i, irow);
|
||||
stats[irow].start = istart; // starting location in entries array
|
||||
// set stats for the next row until this break again
|
||||
icol = 0; // reset row items
|
||||
irow = entries[i].row;
|
||||
istart = i;
|
||||
}
|
||||
icol++; // keep track of number of items in this row
|
||||
}
|
||||
|
||||
*nz_count_len = rows / warp_size + rows % warp_size;
|
||||
*nz_count =
|
||||
(int *)malloc(*nz_count_len * sizeof(int)); // only one value per group
|
||||
|
||||
/* sort based upon row size, greatest first */
|
||||
qsort(stats, rows, sizeof(row_stats), sort_stats);
|
||||
/* figure out padding and grouping */
|
||||
if (debug_level >= 1) {
|
||||
printf("Padding data....%d rows, %d groups\n", rows, *nz_count_len);
|
||||
}
|
||||
int pad_to, total_padding = 0, pack_to;
|
||||
pad_rows *= pack_size; // change padding to account for packed items
|
||||
for (i = 0; i < rows; i++) {
|
||||
// record JDS to real row number
|
||||
(*data_row_map)[i] = stats[i].index;
|
||||
if (i < rows - 1) {
|
||||
// (*data_row_map)[i]--; // ???? no idea why this is off by 1
|
||||
}
|
||||
// each row is padded so the number of packed groups % pad_rows == 0
|
||||
if (i % warp_size ==
|
||||
0) { // on a group boundary with the largest number of items
|
||||
// find padding in individual items
|
||||
if (stats[i].size % pad_rows) {
|
||||
stats[i].padding =
|
||||
pad_rows - (stats[i].size % pad_rows); // find padding
|
||||
} else {
|
||||
stats[i].padding = 0; // no padding necessary, already at pad multiple
|
||||
}
|
||||
if (stats[i].size % pack_size) {
|
||||
pack_to = ceil((float)stats[i].size / pack_size);
|
||||
} else {
|
||||
pack_to = stats[i].size / pack_size;
|
||||
}
|
||||
// pack_to = stats[i].size + (!stats[i].size%pack_size) ? 0 : (pack_size -
|
||||
// stats[i].size%pack_size);
|
||||
pad_to = stats[i].size +
|
||||
stats[i].padding; // total size of this row, with padding
|
||||
// TODO: change this to reflect the real number of nonzero packed items,
|
||||
// not the padded
|
||||
// value
|
||||
(*nz_count)[i / warp_size] =
|
||||
pack_to; // number of packed items in this group
|
||||
total_size += pad_to * warp_size; // allocate size for this padded group
|
||||
if (debug_level >= 2)
|
||||
printf("Padding warp group %d to %d items, zn = %d\n", i / warp_size,
|
||||
pad_to, pack_to);
|
||||
} else {
|
||||
stats[i].padding = pad_to - stats[i].size;
|
||||
}
|
||||
total_padding += stats[i].padding;
|
||||
// if (debug_level >= 2)
|
||||
// printf("Row %d, %d items, %d padding\n", stats[i].index,
|
||||
// stats[i].size, stats[i].padding);
|
||||
}
|
||||
|
||||
/* allocate data and data_row_index */
|
||||
if (debug_level >= 1)
|
||||
printf("Allocating data space: %d entries (%f%% padding)\n", total_size,
|
||||
(float)100 * total_padding / total_size);
|
||||
*data = (float *)calloc(total_size,
|
||||
sizeof(float)); // set to 0 so padded values are set
|
||||
*data_col_index =
|
||||
(int *)calloc(total_size, sizeof(int)); // any unset indexes point to 0
|
||||
*data_row_ptr = (int *)calloc(rows, sizeof(int));
|
||||
*len = total_size;
|
||||
i = 0; // data index, including padding
|
||||
|
||||
/*
|
||||
* Keep looping through each row, writing data a group at a time
|
||||
* to the output array. Increment `irow` each time, and use it as
|
||||
* an index into entries along with stats.start to get the next
|
||||
* data item
|
||||
*/
|
||||
irow = 0; // keep track of which row we are in inside the fubini-ed array
|
||||
int idata = 0; // position within final data array
|
||||
int entry_index, j;
|
||||
int ipack; // used in internal loop for writing packed values
|
||||
mat_entry entry;
|
||||
while (1) {
|
||||
/* record data_row_ptr */
|
||||
(*data_row_ptr)[irow] = idata;
|
||||
|
||||
/* End condtion: the size of the greatest row is smaller than the current
|
||||
Fubini-ed row */
|
||||
if (stats[0].size + stats[0].padding <= irow * pack_size)
|
||||
break;
|
||||
|
||||
// printf("Data row pointer for row %d is %d\n", irow, idata);
|
||||
for (i = 0; i < rows; i++) {
|
||||
/* take one packed group from each original row */
|
||||
// printf("Output irow %d icol %d (real %d,%d size %d)\n", irow, i,
|
||||
// entry.col, i, stats[i].size);
|
||||
/* Watch out for little vs big endian, and how opencl interprets vector
|
||||
* casting from pointers */
|
||||
for (ipack = 0; ipack < pack_size; ipack++) {
|
||||
if (stats[i].size > irow * pack_size + ipack) {
|
||||
// copy value
|
||||
entry_index = stats[i].start + irow * pack_size + ipack;
|
||||
entry = entries[entry_index];
|
||||
/* record index and value */
|
||||
(*data)[idata] = entry.val;
|
||||
/* each data item will get its row index from the thread, col from
|
||||
* here */
|
||||
(*data_col_index)[idata] = entry.col;
|
||||
|
||||
if (debug_level >= 2) {
|
||||
if (i < 3) {
|
||||
// first row debugging
|
||||
printf("[%d row%d=%.3f]", ipack + 1, i, entry.val);
|
||||
} else {
|
||||
printf("%d", ipack + 1);
|
||||
}
|
||||
}
|
||||
} else if (stats[i].size + stats[i].padding >
|
||||
irow * pack_size + ipack) {
|
||||
/* add padding to the end of each row here - this assumes padding is
|
||||
* factored into allocated size */
|
||||
if (debug_level >= 2)
|
||||
printf("0");
|
||||
(*data_col_index)[idata] = -1;
|
||||
} else {
|
||||
goto endwrite; // no data written this pass, so don't increment idata
|
||||
}
|
||||
idata += 1;
|
||||
}
|
||||
}
|
||||
endwrite:
|
||||
if (debug_level >= 2) {
|
||||
printf("\n");
|
||||
}
|
||||
irow += 1;
|
||||
}
|
||||
|
||||
if (debug_level >= 1)
|
||||
printf("Finished converting.\nJDS format has %d columns, %d rows.\n", rows,
|
||||
irow);
|
||||
free(entries);
|
||||
free(stats);
|
||||
printf("nz_count_len = %d\n", *nz_count_len);
|
||||
|
||||
*data_cols = rows;
|
||||
*data_ptr_len = irow + 1;
|
||||
return 0;
|
||||
}
|
17
benchmarks/opencl/spmv/convert_dataset.h
Normal file
17
benchmarks/opencl/spmv/convert_dataset.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef _CONVERT_DATASET_H
|
||||
#define _CONVERT_DATASET_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int coo_to_jds(char* mtx_filename, int pad_rows, int warp_size, int pack_size,
|
||||
int mirrored, int binary, int debug_level,
|
||||
float** data, int** data_row_ptr, int** nz_count, int** data_col_index,
|
||||
int** data_row_map, int* data_cols, int* dim, int* len, int* nz_count_len,
|
||||
int* data_ptr_len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
78
benchmarks/opencl/spmv/file.c
Normal file
78
benchmarks/opencl/spmv/file.c
Normal file
|
@ -0,0 +1,78 @@
|
|||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2007 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
//#include <endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#if __BYTE_ORDER != __LITTLE_ENDIAN
|
||||
# error "File I/O is not implemented for this system: wrong endianness."
|
||||
#endif
|
||||
|
||||
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
|
||||
float** h_data, int** h_indices, int** h_ptr,
|
||||
int** h_perm, int** h_nzcnt)
|
||||
{
|
||||
FILE* fid = fopen(fName, "rb");
|
||||
|
||||
if (fid == NULL)
|
||||
{
|
||||
fprintf(stderr, "Cannot open input file\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fscanf(fid, "%d %d %d %d %d\n",len,depth,nzcnt_len,dim,pad);
|
||||
int _len=len[0];
|
||||
int _depth=depth[0];
|
||||
int _dim=dim[0];
|
||||
int _pad=pad[0];
|
||||
int _nzcnt_len=nzcnt_len[0];
|
||||
|
||||
*h_data = (float *) malloc(_len * sizeof (float));
|
||||
fread (*h_data, sizeof (float), _len, fid);
|
||||
|
||||
*h_indices = (int *) malloc(_len * sizeof (int));
|
||||
fread (*h_indices, sizeof (int), _len, fid);
|
||||
|
||||
*h_ptr = (int *) malloc(_depth * sizeof (int));
|
||||
fread (*h_ptr, sizeof (int), _depth, fid);
|
||||
|
||||
*h_perm = (int *) malloc(_dim * sizeof (int));
|
||||
fread (*h_perm, sizeof (int), _dim, fid);
|
||||
|
||||
*h_nzcnt = (int *) malloc(_nzcnt_len * sizeof (int));
|
||||
fread (*h_nzcnt, sizeof (int), _nzcnt_len, fid);
|
||||
|
||||
fclose (fid);
|
||||
}
|
||||
|
||||
void input_vec(char *fName,float *h_vec,int dim)
|
||||
{
|
||||
FILE* fid = fopen(fName, "rb");
|
||||
fread (h_vec, sizeof (float), dim, fid);
|
||||
fclose(fid);
|
||||
|
||||
}
|
||||
|
||||
void outputData(char* fName, float *h_Ax_vector,int dim)
|
||||
{
|
||||
FILE* fid = fopen(fName, "w");
|
||||
uint32_t tmp32;
|
||||
if (fid == NULL)
|
||||
{
|
||||
fprintf(stderr, "Cannot open output file\n");
|
||||
exit(-1);
|
||||
}
|
||||
tmp32 = dim;
|
||||
fwrite(&tmp32, sizeof(uint32_t), 1, fid);
|
||||
fwrite(h_Ax_vector, sizeof(float), dim, fid);
|
||||
|
||||
fclose (fid);
|
||||
}
|
18
benchmarks/opencl/spmv/file.h
Normal file
18
benchmarks/opencl/spmv/file.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2007 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
#ifndef __FILEH__
|
||||
#define __FILEH__
|
||||
|
||||
void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
|
||||
float** h_data, int** h_indices, int** h_ptr,
|
||||
int** h_perm, int** h_nzcnt);
|
||||
|
||||
void input_vec(char* fNanme, float *h_vec,int dim);
|
||||
void outputData(char* fName, float *h_Ax_vector,int dim);
|
||||
|
||||
#endif
|
55
benchmarks/opencl/spmv/gpu_info.c
Normal file
55
benchmarks/opencl/spmv/gpu_info.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
//#include <endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "gpu_info.h"
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
int pad,
|
||||
int major,
|
||||
int minor,
|
||||
int sm)
|
||||
{
|
||||
int max_thread;
|
||||
int max_block=8;
|
||||
if(major==1)
|
||||
{
|
||||
if(minor>=2)
|
||||
max_thread=1024;
|
||||
else
|
||||
max_thread=768;
|
||||
}
|
||||
else if(major==2)
|
||||
max_thread=1536;
|
||||
else
|
||||
//newer GPU //keep using 2.0
|
||||
max_thread=1536;
|
||||
|
||||
int _grid;
|
||||
int _thread;
|
||||
|
||||
if(task*pad>sm*max_thread)
|
||||
{
|
||||
_thread=max_thread/max_block;
|
||||
_grid = ((task*pad+_thread-1)/_thread)*_thread;
|
||||
}
|
||||
else
|
||||
{
|
||||
_thread=pad;
|
||||
_grid=task*pad;
|
||||
}
|
||||
|
||||
thread[0]=_thread;
|
||||
grid[0]=_grid;
|
||||
}
|
20
benchmarks/opencl/spmv/gpu_info.h
Normal file
20
benchmarks/opencl/spmv/gpu_info.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef __GPUINFOH__
|
||||
#define __GPUINFOH__
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
int pad,
|
||||
int major,
|
||||
int minor,
|
||||
int sm);
|
||||
|
||||
#endif
|
2610
benchmarks/opencl/spmv/input/1138_bus.mtx
Executable file
2610
benchmarks/opencl/spmv/input/1138_bus.mtx
Executable file
File diff suppressed because it is too large
Load diff
BIN
benchmarks/opencl/spmv/input/1138_bus.mtx.bin
Executable file
BIN
benchmarks/opencl/spmv/input/1138_bus.mtx.bin
Executable file
Binary file not shown.
3
benchmarks/opencl/spmv/input/DESCRIPTION
Executable file
3
benchmarks/opencl/spmv/input/DESCRIPTION
Executable file
|
@ -0,0 +1,3 @@
|
|||
Inputs: 1138_bus.mtx vector.bin
|
||||
|
||||
This is a very small phantom data set.
|
BIN
benchmarks/opencl/spmv/input/vector.bin
Normal file
BIN
benchmarks/opencl/spmv/input/vector.bin
Normal file
Binary file not shown.
36
benchmarks/opencl/spmv/kernel.cl
Normal file
36
benchmarks/opencl/spmv/kernel.cl
Normal file
|
@ -0,0 +1,36 @@
|
|||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
__kernel void spmv_jds_naive(__global float *dst_vector, __global float *d_data,
|
||||
__global int *d_index, __global int *d_perm,
|
||||
__global float *x_vec, const int dim,
|
||||
__constant int *jds_ptr_int,
|
||||
__constant int *sh_zcnt_int)
|
||||
{
|
||||
int ix = get_global_id(0);
|
||||
|
||||
if (ix < dim) {
|
||||
float sum = 0.0f;
|
||||
// 32 is warp size
|
||||
int bound=sh_zcnt_int[ix/32];
|
||||
|
||||
for(int k=0;k<bound;k++)
|
||||
{
|
||||
int j = jds_ptr_int[k] + ix;
|
||||
int in = d_index[j];
|
||||
|
||||
float d = d_data[j];
|
||||
float t = x_vec[in];
|
||||
|
||||
sum += d*t;
|
||||
}
|
||||
|
||||
dst_vector[d_perm[ix]] = sum;
|
||||
}
|
||||
}
|
||||
|
BIN
benchmarks/opencl/spmv/libspmv.a
Normal file
BIN
benchmarks/opencl/spmv/libspmv.a
Normal file
Binary file not shown.
301
benchmarks/opencl/spmv/main.cc
Normal file
301
benchmarks/opencl/spmv/main.cc
Normal file
|
@ -0,0 +1,301 @@
|
|||
/***************************************************************************
|
||||
*cr
|
||||
*cr (C) Copyright 2010 The Board of Trustees of the
|
||||
*cr University of Illinois
|
||||
*cr All Rights Reserved
|
||||
*cr
|
||||
***************************************************************************/
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
#include <parboil.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "convert_dataset.h"
|
||||
#include "file.h"
|
||||
#include "gpu_info.h"
|
||||
#include "ocl.h"
|
||||
|
||||
|
||||
static int generate_vector(float *x_vector, int dim) {
|
||||
srand(54321);
|
||||
int i;
|
||||
for (i = 0; i < dim; i++) {
|
||||
x_vector[i] = (rand() / (float)RAND_MAX);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
struct pb_TimerSet timers;
|
||||
struct pb_Parameters *parameters;
|
||||
|
||||
printf("CUDA accelerated sparse matrix vector multiplication****\n");
|
||||
printf("Original version by Li-Wen Chang <lchang20@illinois.edu> and "
|
||||
"Shengzhao Wu<wu14@illinois.edu>\n");
|
||||
printf("This version maintained by Chris Rodrigues ***********\n");
|
||||
// parameters = pb_ReadParameters(&argc, argv);
|
||||
parameters->inpFiles = (char **)malloc(sizeof(char *) * 3);
|
||||
parameters->inpFiles[0] = (char *)malloc(100);
|
||||
parameters->inpFiles[1] = (char *)malloc(100);
|
||||
parameters->inpFiles[2] = NULL;
|
||||
strncpy(parameters->inpFiles[0], "1138_bus.mtx", 100);
|
||||
strncpy(parameters->inpFiles[1], "vector.bin", 100);
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
if ((parameters->inpFiles[0] == NULL) || (parameters->inpFiles[1] == NULL)) {
|
||||
fprintf(stderr, "Expecting one input filename\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
pb_InitializeTimerSet(&timers);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
||||
// parameters declaration
|
||||
cl_int clStatus;
|
||||
pb_Context *pb_context;
|
||||
pb_context = pb_InitOpenCLContext(parameters);
|
||||
if (pb_context == NULL) {
|
||||
fprintf(stderr, "Error: No OpenCL platform/device can be found.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
cl_device_id clDevice = (cl_device_id)pb_context->clDeviceId;
|
||||
cl_platform_id clPlatform = (cl_platform_id)pb_context->clPlatformId;
|
||||
cl_context clContext = (cl_context)pb_context->clContext;
|
||||
cl_command_queue clCommandQueue = clCreateCommandQueue(
|
||||
clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &clStatus);
|
||||
CHECK_ERROR("clCreateCommandQueue")
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
pb_SetOpenCL(&clContext, &clCommandQueue);
|
||||
|
||||
//const char *clSource[] = {readFile("src/opencl_base/kernel.cl")};
|
||||
// cl_program clProgram =
|
||||
// clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
|
||||
cl_program clProgram = clCreateProgramWithBuiltInKernels(
|
||||
clContext, 1, &clDevice, "spmv_jds_naive", &clStatus);
|
||||
CHECK_ERROR("clCreateProgramWithSource")
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
char clOptions[50];
|
||||
sprintf(clOptions, "");
|
||||
clStatus = clBuildProgram(clProgram, 1, &clDevice, clOptions, NULL, NULL);
|
||||
CHECK_ERROR("clBuildProgram")
|
||||
|
||||
cl_kernel clKernel = clCreateKernel(clProgram, "spmv_jds_naive", &clStatus);
|
||||
CHECK_ERROR("clCreateKernel")
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
int len;
|
||||
int depth;
|
||||
int dim;
|
||||
int pad = 32;
|
||||
int nzcnt_len;
|
||||
|
||||
// host memory allocation
|
||||
// matrix
|
||||
float *h_data;
|
||||
int *h_indices;
|
||||
int *h_ptr;
|
||||
int *h_perm;
|
||||
int *h_nzcnt;
|
||||
// vector
|
||||
float *h_Ax_vector;
|
||||
float *h_x_vector;
|
||||
|
||||
// device memory allocation
|
||||
// matrix
|
||||
cl_mem d_data;
|
||||
cl_mem d_indices;
|
||||
cl_mem d_ptr;
|
||||
cl_mem d_perm;
|
||||
cl_mem d_nzcnt;
|
||||
|
||||
// vector
|
||||
cl_mem d_Ax_vector;
|
||||
cl_mem d_x_vector;
|
||||
|
||||
cl_mem jds_ptr_int;
|
||||
cl_mem sh_zcnt_int;
|
||||
|
||||
// load matrix from files
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
// inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad,
|
||||
// &h_data, &h_indices, &h_ptr,
|
||||
// &h_perm, &h_nzcnt);
|
||||
int col_count;
|
||||
printf("OK--\n");
|
||||
coo_to_jds(parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx
|
||||
1, // row padding
|
||||
pad, // warp size
|
||||
1, // pack size
|
||||
1, // is mirrored?
|
||||
0, // binary matrix
|
||||
1, // debug level [0:2]
|
||||
&h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm, &col_count, &dim,
|
||||
&len, &nzcnt_len, &depth);
|
||||
|
||||
printf("OK++\n");
|
||||
|
||||
// pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
h_Ax_vector = (float *)malloc(sizeof(float) * dim);
|
||||
h_x_vector = (float *)malloc(sizeof(float) * dim);
|
||||
|
||||
input_vec(parameters->inpFiles[1], h_x_vector, dim);
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
OpenCLDeviceProp clDeviceProp;
|
||||
// clStatus =
|
||||
//clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,sizeof(cl_uint),&(clDeviceProp.major),NULL);
|
||||
// CHECK_ERROR("clGetDeviceInfo")
|
||||
// clStatus =
|
||||
//clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,sizeof(cl_uint),&(clDeviceProp.minor),NULL);
|
||||
// CHECK_ERROR("clGetDeviceInfo")
|
||||
clStatus =
|
||||
clGetDeviceInfo(clDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint),
|
||||
&(clDeviceProp.multiProcessorCount), NULL);
|
||||
CHECK_ERROR("clGetDeviceInfo")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
// memory allocation
|
||||
d_data = clCreateBuffer(clContext, CL_MEM_READ_ONLY, len * sizeof(float),
|
||||
NULL, &clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
d_indices = clCreateBuffer(clContext, CL_MEM_READ_ONLY, len * sizeof(int),
|
||||
NULL, &clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
d_perm = clCreateBuffer(clContext, CL_MEM_READ_ONLY, dim * sizeof(int), NULL,
|
||||
&clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
d_x_vector = clCreateBuffer(clContext, CL_MEM_READ_ONLY, dim * sizeof(float),
|
||||
NULL, &clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
d_Ax_vector = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY,
|
||||
dim * sizeof(float), NULL, &clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
|
||||
jds_ptr_int = clCreateBuffer(clContext, CL_MEM_READ_ONLY, 5000 * sizeof(int),
|
||||
NULL, &clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
sh_zcnt_int = clCreateBuffer(clContext, CL_MEM_READ_ONLY, 5000 * sizeof(int),
|
||||
NULL, &clStatus);
|
||||
CHECK_ERROR("clCreateBuffer")
|
||||
|
||||
clMemSet(clCommandQueue, d_Ax_vector, 0, dim * sizeof(float));
|
||||
|
||||
// memory copy
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_data, CL_FALSE, 0,
|
||||
len * sizeof(float), h_data, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_indices, CL_FALSE, 0,
|
||||
len * sizeof(int), h_indices, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_perm, CL_FALSE, 0,
|
||||
dim * sizeof(int), h_perm, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue, d_x_vector, CL_FALSE, 0,
|
||||
dim * sizeof(int), h_x_vector, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue, jds_ptr_int, CL_FALSE, 0,
|
||||
depth * sizeof(int), h_ptr, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
clStatus =
|
||||
clEnqueueWriteBuffer(clCommandQueue, sh_zcnt_int, CL_TRUE, 0,
|
||||
nzcnt_len * sizeof(int), h_nzcnt, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
||||
size_t grid;
|
||||
size_t block;
|
||||
|
||||
compute_active_thread(&block, &grid, nzcnt_len, pad, clDeviceProp.major,
|
||||
clDeviceProp.minor, clDeviceProp.multiProcessorCount);
|
||||
// printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!grid is %d and block is
|
||||
// %d=\n",grid,block);
|
||||
// printf("!!! dim is %d\n",dim);
|
||||
|
||||
clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), &d_Ax_vector);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), &d_data);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), &d_indices);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), &d_perm);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), &d_x_vector);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
clStatus = clSetKernelArg(clKernel, 5, sizeof(int), &dim);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
clStatus = clSetKernelArg(clKernel, 6, sizeof(cl_mem), &jds_ptr_int);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
clStatus = clSetKernelArg(clKernel, 7, sizeof(cl_mem), &sh_zcnt_int);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
// main execution
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 50; i++) {
|
||||
clStatus = clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, NULL, &grid,
|
||||
&block, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
}
|
||||
|
||||
clStatus = clFinish(clCommandQueue);
|
||||
CHECK_ERROR("clFinish")
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
|
||||
// HtoD memory copy
|
||||
clStatus =
|
||||
clEnqueueReadBuffer(clCommandQueue, d_Ax_vector, CL_TRUE, 0,
|
||||
dim * sizeof(float), h_Ax_vector, 0, NULL, NULL);
|
||||
CHECK_ERROR("clEnqueueReadBuffer")
|
||||
|
||||
clStatus = clReleaseKernel(clKernel);
|
||||
clStatus = clReleaseProgram(clProgram);
|
||||
|
||||
clStatus = clReleaseMemObject(d_data);
|
||||
clStatus = clReleaseMemObject(d_indices);
|
||||
clStatus = clReleaseMemObject(d_perm);
|
||||
clStatus = clReleaseMemObject(d_x_vector);
|
||||
clStatus = clReleaseMemObject(d_Ax_vector);
|
||||
CHECK_ERROR("clReleaseMemObject")
|
||||
|
||||
clStatus = clReleaseCommandQueue(clCommandQueue);
|
||||
clStatus = clReleaseContext(clContext);
|
||||
|
||||
if (parameters->outFile) {
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
outputData(parameters->outFile, h_Ax_vector, dim);
|
||||
}
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
||||
//free((void *)clSource[0]);
|
||||
|
||||
free(h_data);
|
||||
free(h_indices);
|
||||
free(h_ptr);
|
||||
free(h_perm);
|
||||
free(h_nzcnt);
|
||||
free(h_Ax_vector);
|
||||
free(h_x_vector);
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
|
||||
|
||||
pb_PrintTimerSet(&timers);
|
||||
pb_FreeParameters(parameters);
|
||||
|
||||
return 0;
|
||||
}
|
509
benchmarks/opencl/spmv/mmio.c
Normal file
509
benchmarks/opencl/spmv/mmio.c
Normal file
|
@ -0,0 +1,509 @@
|
|||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include "mmio.h"
|
||||
|
||||
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
|
||||
double **val_, int **I_, int **J_)
|
||||
{
|
||||
FILE *f;
|
||||
MM_typecode matcode;
|
||||
int M, N, nz;
|
||||
int i;
|
||||
double *val;
|
||||
int *I, *J;
|
||||
|
||||
if ((f = fopen(fname, "r")) == NULL)
|
||||
return -1;
|
||||
|
||||
|
||||
if (mm_read_banner(f, &matcode) != 0)
|
||||
{
|
||||
printf("mm_read_unsymetric: Could not process Matrix Market banner ");
|
||||
printf(" in file [%s]\n", fname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
|
||||
mm_is_sparse(matcode)))
|
||||
{
|
||||
fprintf(stderr, "Sorry, this application does not support ");
|
||||
fprintf(stderr, "Market Market type: [%s]\n",
|
||||
mm_typecode_to_str(matcode));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* find out size of sparse matrix: M, N, nz .... */
|
||||
|
||||
if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
|
||||
{
|
||||
fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
*M_ = M;
|
||||
*N_ = N;
|
||||
*nz_ = nz;
|
||||
|
||||
/* reseve memory for matrices */
|
||||
|
||||
I = (int *) malloc(nz * sizeof(int));
|
||||
J = (int *) malloc(nz * sizeof(int));
|
||||
val = (double *) malloc(nz * sizeof(double));
|
||||
|
||||
*val_ = val;
|
||||
*I_ = I;
|
||||
*J_ = J;
|
||||
|
||||
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
|
||||
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
|
||||
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
|
||||
|
||||
for (i=0; i<nz; i++)
|
||||
{
|
||||
fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
|
||||
I[i]--; /* adjust from 1-based to 0-based */
|
||||
J[i]--;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_is_valid(MM_typecode matcode)
|
||||
{
|
||||
if (!mm_is_matrix(matcode)) return 0;
|
||||
if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
|
||||
if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
|
||||
if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
|
||||
mm_is_skew(matcode))) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int mm_read_banner(FILE *f, MM_typecode *matcode)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
char banner[MM_MAX_TOKEN_LENGTH];
|
||||
char mtx[MM_MAX_TOKEN_LENGTH];
|
||||
char crd[MM_MAX_TOKEN_LENGTH];
|
||||
char data_type[MM_MAX_TOKEN_LENGTH];
|
||||
char storage_scheme[MM_MAX_TOKEN_LENGTH];
|
||||
char *p;
|
||||
|
||||
|
||||
mm_clear_typecode(matcode);
|
||||
|
||||
if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
|
||||
storage_scheme) != 5)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
|
||||
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
|
||||
|
||||
/* check for banner */
|
||||
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
|
||||
return MM_NO_HEADER;
|
||||
|
||||
/* first field should be "mtx" */
|
||||
if (strcmp(mtx, MM_MTX_STR) != 0)
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
mm_set_matrix(matcode);
|
||||
|
||||
|
||||
/* second field describes whether this is a sparse matrix (in coordinate
|
||||
storgae) or a dense array */
|
||||
|
||||
|
||||
if (strcmp(crd, MM_SPARSE_STR) == 0)
|
||||
mm_set_sparse(matcode);
|
||||
else
|
||||
if (strcmp(crd, MM_DENSE_STR) == 0)
|
||||
mm_set_dense(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* third field */
|
||||
|
||||
if (strcmp(data_type, MM_REAL_STR) == 0)
|
||||
mm_set_real(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
|
||||
mm_set_complex(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_PATTERN_STR) == 0)
|
||||
mm_set_pattern(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_INT_STR) == 0)
|
||||
mm_set_integer(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* fourth field */
|
||||
|
||||
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
|
||||
mm_set_general(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
|
||||
mm_set_symmetric(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
|
||||
mm_set_hermitian(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
|
||||
mm_set_skew(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
|
||||
{
|
||||
if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = *nz = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
|
||||
return 0;
|
||||
|
||||
else
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int mm_read_mtx_array_size(FILE *f, int *M, int *N)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d", M, N) == 2)
|
||||
return 0;
|
||||
|
||||
else /* we have a blank line */
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d", M, N);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_array_size(FILE *f, int M, int N)
|
||||
{
|
||||
if (fprintf(f, "%d %d\n", M, N) != 2)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------*/
|
||||
|
||||
/******************************************************************/
|
||||
/* use when I[], J[], and val[]J, and val[] are already allocated */
|
||||
/******************************************************************/
|
||||
|
||||
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode)
|
||||
{
|
||||
int i;
|
||||
if (mm_is_complex(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
|
||||
!= 4) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else if (mm_is_real(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
|
||||
!= 3) return MM_PREMATURE_EOF;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
if (fscanf(f, "%d %d", &I[i], &J[i])
|
||||
!= 2) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
|
||||
double *real, double *imag, MM_typecode matcode)
|
||||
{
|
||||
if (mm_is_complex(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
|
||||
!= 4) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else if (mm_is_real(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", I, J, real)
|
||||
!= 3) return MM_PREMATURE_EOF;
|
||||
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
mm_read_mtx_crd() fills M, N, nz, array of values, and return
|
||||
type code, e.g. 'MCRS'
|
||||
|
||||
if matrix is complex, values[] is of size 2*nz,
|
||||
(nz pairs of real/imaginary values)
|
||||
************************************************************************/
|
||||
|
||||
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
|
||||
double **val, MM_typecode *matcode)
|
||||
{
|
||||
int ret_code;
|
||||
FILE *f;
|
||||
|
||||
if (strcmp(fname, "stdin") == 0) f=stdin;
|
||||
else
|
||||
if ((f = fopen(fname, "r")) == NULL)
|
||||
return MM_COULD_NOT_READ_FILE;
|
||||
|
||||
|
||||
if ((ret_code = mm_read_banner(f, matcode)) != 0)
|
||||
return ret_code;
|
||||
|
||||
if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
|
||||
mm_is_matrix(*matcode)))
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
|
||||
return ret_code;
|
||||
|
||||
|
||||
*I = (int *) malloc(*nz * sizeof(int));
|
||||
*J = (int *) malloc(*nz * sizeof(int));
|
||||
*val = NULL;
|
||||
|
||||
if (mm_is_complex(*matcode))
|
||||
{
|
||||
*val = (double *) malloc(*nz * 2 * sizeof(double));
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
else if (mm_is_real(*matcode))
|
||||
{
|
||||
*val = (double *) malloc(*nz * sizeof(double));
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(*matcode))
|
||||
{
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
|
||||
if (f != stdin) fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_banner(FILE *f, MM_typecode matcode)
|
||||
{
|
||||
char *str = mm_typecode_to_str(matcode);
|
||||
int ret_code;
|
||||
|
||||
ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
|
||||
free(str);
|
||||
if (ret_code !=2 )
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode)
|
||||
{
|
||||
FILE *f;
|
||||
int i;
|
||||
|
||||
if (strcmp(fname, "stdout") == 0)
|
||||
f = stdout;
|
||||
else
|
||||
if ((f = fopen(fname, "w")) == NULL)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
|
||||
/* print banner followed by typecode */
|
||||
fprintf(f, "%s ", MatrixMarketBanner);
|
||||
fprintf(f, "%s\n", mm_typecode_to_str(matcode));
|
||||
|
||||
/* print matrix sizes and nonzeros */
|
||||
fprintf(f, "%d %d %d\n", M, N, nz);
|
||||
|
||||
/* print values */
|
||||
if (mm_is_pattern(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d\n", I[i], J[i]);
|
||||
else
|
||||
if (mm_is_real(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
|
||||
else
|
||||
if (mm_is_complex(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
|
||||
val[2*i+1]);
|
||||
else
|
||||
{
|
||||
if (f != stdout) fclose(f);
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
}
|
||||
|
||||
if (f !=stdout) fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a new copy of a string s. mm_strdup() is a common routine, but
|
||||
* not part of ANSI C, so it is included here. Used by mm_typecode_to_str().
|
||||
*
|
||||
*/
|
||||
char *mm_strdup(const char *s)
|
||||
{
|
||||
int len = strlen(s);
|
||||
char *s2 = (char *) malloc((len+1)*sizeof(char));
|
||||
return strcpy(s2, s);
|
||||
}
|
||||
|
||||
char *mm_typecode_to_str(MM_typecode matcode)
|
||||
{
|
||||
char buffer[MM_MAX_LINE_LENGTH];
|
||||
char *types[4];
|
||||
char *mm_strdup(const char *);
|
||||
int error =0;
|
||||
|
||||
/* check for MTX type */
|
||||
if (mm_is_matrix(matcode))
|
||||
types[0] = MM_MTX_STR;
|
||||
else
|
||||
error=1;
|
||||
|
||||
/* check for CRD or ARR matrix */
|
||||
if (mm_is_sparse(matcode))
|
||||
types[1] = MM_SPARSE_STR;
|
||||
else
|
||||
if (mm_is_dense(matcode))
|
||||
types[1] = MM_DENSE_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
/* check for element data type */
|
||||
if (mm_is_real(matcode))
|
||||
types[2] = MM_REAL_STR;
|
||||
else
|
||||
if (mm_is_complex(matcode))
|
||||
types[2] = MM_COMPLEX_STR;
|
||||
else
|
||||
if (mm_is_pattern(matcode))
|
||||
types[2] = MM_PATTERN_STR;
|
||||
else
|
||||
if (mm_is_integer(matcode))
|
||||
types[2] = MM_INT_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
|
||||
/* check for symmetry type */
|
||||
if (mm_is_general(matcode))
|
||||
types[3] = MM_GENERAL_STR;
|
||||
else
|
||||
if (mm_is_symmetric(matcode))
|
||||
types[3] = MM_SYMM_STR;
|
||||
else
|
||||
if (mm_is_hermitian(matcode))
|
||||
types[3] = MM_HERM_STR;
|
||||
else
|
||||
if (mm_is_skew(matcode))
|
||||
types[3] = MM_SKEW_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
|
||||
return mm_strdup(buffer);
|
||||
|
||||
}
|
135
benchmarks/opencl/spmv/mmio.h
Normal file
135
benchmarks/opencl/spmv/mmio.h
Normal file
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MM_IO_H
|
||||
#define MM_IO_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define MM_MAX_LINE_LENGTH 1025
|
||||
#define MatrixMarketBanner "%%MatrixMarket"
|
||||
#define MM_MAX_TOKEN_LENGTH 64
|
||||
|
||||
typedef char MM_typecode[4];
|
||||
|
||||
char *mm_typecode_to_str(MM_typecode matcode);
|
||||
|
||||
int mm_read_banner(FILE *f, MM_typecode *matcode);
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
|
||||
int mm_read_mtx_array_size(FILE *f, int *M, int *N);
|
||||
|
||||
int mm_write_banner(FILE *f, MM_typecode matcode);
|
||||
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
|
||||
int mm_write_mtx_array_size(FILE *f, int M, int N);
|
||||
|
||||
|
||||
/********************* MM_typecode query fucntions ***************************/
|
||||
|
||||
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
|
||||
|
||||
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
|
||||
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
|
||||
#define mm_is_dense(typecode) ((typecode)[1]=='A')
|
||||
#define mm_is_array(typecode) ((typecode)[1]=='A')
|
||||
|
||||
#define mm_is_complex(typecode) ((typecode)[2]=='C')
|
||||
#define mm_is_real(typecode) ((typecode)[2]=='R')
|
||||
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
|
||||
#define mm_is_integer(typecode) ((typecode)[2]=='I')
|
||||
|
||||
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
|
||||
#define mm_is_general(typecode) ((typecode)[3]=='G')
|
||||
#define mm_is_skew(typecode) ((typecode)[3]=='K')
|
||||
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
|
||||
|
||||
int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
|
||||
|
||||
|
||||
/********************* MM_typecode modify fucntions ***************************/
|
||||
|
||||
#define mm_set_matrix(typecode) ((*typecode)[0]='M')
|
||||
#define mm_set_coordinate(typecode) ((*typecode)[1]='C')
|
||||
#define mm_set_array(typecode) ((*typecode)[1]='A')
|
||||
#define mm_set_dense(typecode) mm_set_array(typecode)
|
||||
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
|
||||
|
||||
#define mm_set_complex(typecode)((*typecode)[2]='C')
|
||||
#define mm_set_real(typecode) ((*typecode)[2]='R')
|
||||
#define mm_set_pattern(typecode)((*typecode)[2]='P')
|
||||
#define mm_set_integer(typecode)((*typecode)[2]='I')
|
||||
|
||||
|
||||
#define mm_set_symmetric(typecode)((*typecode)[3]='S')
|
||||
#define mm_set_general(typecode)((*typecode)[3]='G')
|
||||
#define mm_set_skew(typecode) ((*typecode)[3]='K')
|
||||
#define mm_set_hermitian(typecode)((*typecode)[3]='H')
|
||||
|
||||
#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
|
||||
(*typecode)[2]=' ',(*typecode)[3]='G')
|
||||
|
||||
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
|
||||
|
||||
|
||||
/********************* Matrix Market error codes ***************************/
|
||||
|
||||
|
||||
#define MM_COULD_NOT_READ_FILE 11
|
||||
#define MM_PREMATURE_EOF 12
|
||||
#define MM_NOT_MTX 13
|
||||
#define MM_NO_HEADER 14
|
||||
#define MM_UNSUPPORTED_TYPE 15
|
||||
#define MM_LINE_TOO_LONG 16
|
||||
#define MM_COULD_NOT_WRITE_FILE 17
|
||||
|
||||
|
||||
/******************** Matrix Market internal definitions ********************
|
||||
|
||||
MM_matrix_typecode: 4-character sequence
|
||||
|
||||
ojbect sparse/ data storage
|
||||
dense type scheme
|
||||
|
||||
string position: [0] [1] [2] [3]
|
||||
|
||||
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
|
||||
A(array) C(omplex) H(ermitian)
|
||||
P(attern) S(ymmetric)
|
||||
I(nteger) K(kew)
|
||||
|
||||
***********************************************************************/
|
||||
|
||||
#define MM_MTX_STR "matrix"
|
||||
#define MM_ARRAY_STR "array"
|
||||
#define MM_DENSE_STR "array"
|
||||
#define MM_COORDINATE_STR "coordinate"
|
||||
#define MM_SPARSE_STR "coordinate"
|
||||
#define MM_COMPLEX_STR "complex"
|
||||
#define MM_REAL_STR "real"
|
||||
#define MM_INT_STR "integer"
|
||||
#define MM_GENERAL_STR "general"
|
||||
#define MM_SYMM_STR "symmetric"
|
||||
#define MM_HERM_STR "hermitian"
|
||||
#define MM_SKEW_STR "skew-symmetric"
|
||||
#define MM_PATTERN_STR "pattern"
|
||||
|
||||
|
||||
/* high level routines */
|
||||
|
||||
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode);
|
||||
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode);
|
||||
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
|
||||
MM_typecode matcode);
|
||||
|
||||
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
|
||||
double **val_, int **I_, int **J_);
|
||||
|
||||
|
||||
|
||||
#endif
|
50
benchmarks/opencl/spmv/ocl.c
Normal file
50
benchmarks/opencl/spmv/ocl.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include <CL/cl.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "ocl.h"
|
||||
|
||||
char* readFile(const char* fileName)
|
||||
{
|
||||
FILE* fp;
|
||||
fp = fopen(fileName,"r");
|
||||
if(fp == NULL)
|
||||
{
|
||||
printf("Error 1!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fseek(fp,0,SEEK_END);
|
||||
long size = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
char* buffer = (char*)malloc(sizeof(char)*(size+1));
|
||||
if(buffer == NULL)
|
||||
{
|
||||
printf("Error 2!\n");
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t res = fread(buffer,1,size,fp);
|
||||
if(res != size)
|
||||
{
|
||||
printf("Error 3!\n");
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer[size] = 0;
|
||||
fclose(fp);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size)
|
||||
{
|
||||
cl_int clStatus;
|
||||
char* temp = (char*)malloc(size);
|
||||
memset(temp,val,size);
|
||||
clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
|
||||
CHECK_ERROR("clEnqueueWriteBuffer")
|
||||
free(temp);
|
||||
}
|
21
benchmarks/opencl/spmv/ocl.h
Normal file
21
benchmarks/opencl/spmv/ocl.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#ifndef __OCLH__
|
||||
#define __OCLH__
|
||||
|
||||
typedef struct {
|
||||
cl_uint major;
|
||||
cl_uint minor;
|
||||
cl_uint multiProcessorCount;
|
||||
} OpenCLDeviceProp;
|
||||
|
||||
void clMemSet(cl_command_queue, cl_mem, int, size_t);
|
||||
char* readFile(const char*);
|
||||
|
||||
#define CHECK_ERROR(errorMessage) \
|
||||
if(clStatus != CL_SUCCESS) \
|
||||
{ \
|
||||
printf("Error: %s!\n",errorMessage); \
|
||||
printf("Line: %d\n",__LINE__); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#endif
|
427
benchmarks/opencl/spmv/parboil.c
Normal file
427
benchmarks/opencl/spmv/parboil.c
Normal file
|
@ -0,0 +1,427 @@
|
|||
/*
|
||||
* (c) 2007 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
|
||||
#include <parboil.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Timer routines */
|
||||
|
||||
static void
|
||||
accumulate_time(pb_Timestamp *accum,
|
||||
pb_Timestamp start,
|
||||
pb_Timestamp end)
|
||||
{
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
*accum += end - start;
|
||||
#else
|
||||
# error "Timestamps not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
static pb_Timestamp get_time()
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec);
|
||||
}
|
||||
#else
|
||||
# error "no supported time libraries are available on this platform"
|
||||
#endif
|
||||
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer)
|
||||
{
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
timer->elapsed = 0;
|
||||
#else
|
||||
# error "pb_ResetTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer)
|
||||
{
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Ignoring attempt to start a running timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer)
|
||||
{
|
||||
unsigned int numNotStopped = 0x3; // 11
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Timer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_STOPPED) {
|
||||
fputs("Warning: Subtimer was not stopped\n", stderr);
|
||||
numNotStopped &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotStopped == 0x0) {
|
||||
fputs("Ignoring attempt to start running timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_RUNNING;
|
||||
subtimer->state = pb_Timer_RUNNING;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
if (numNotStopped & 0x2) {
|
||||
timer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
|
||||
if (numNotStopped & 0x1) {
|
||||
subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
}
|
||||
#else
|
||||
# error "pb_StartTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer)
|
||||
{
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Ignoring attempt to stop a stopped timer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
|
||||
}
|
||||
|
||||
void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) {
|
||||
|
||||
pb_Timestamp fini;
|
||||
|
||||
unsigned int numNotRunning = 0x3; // 0b11
|
||||
if (timer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Timer was not running\n", stderr);
|
||||
numNotRunning &= 0x1; // Zero out 2^1
|
||||
}
|
||||
if (subtimer->state != pb_Timer_RUNNING) {
|
||||
fputs("Warning: Subtimer was not running\n", stderr);
|
||||
numNotRunning &= 0x2; // Zero out 2^0
|
||||
}
|
||||
if (numNotRunning == 0x0) {
|
||||
fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
timer->state = pb_Timer_STOPPED;
|
||||
subtimer->state = pb_Timer_STOPPED;
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
fini = tv.tv_sec * 1000000LL + tv.tv_usec;
|
||||
}
|
||||
#else
|
||||
# error "pb_StopTimer: not implemented for this system"
|
||||
#endif
|
||||
|
||||
if (numNotRunning & 0x2) {
|
||||
accumulate_time(&timer->elapsed, timer->init, fini);
|
||||
timer->init = fini;
|
||||
}
|
||||
|
||||
if (numNotRunning & 0x1) {
|
||||
accumulate_time(&subtimer->elapsed, subtimer->init, fini);
|
||||
subtimer->init = fini;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer)
|
||||
{
|
||||
double ret;
|
||||
|
||||
if (timer->state != pb_Timer_STOPPED) {
|
||||
fputs("Elapsed time from a running timer is inaccurate\n", stderr);
|
||||
}
|
||||
|
||||
#if _POSIX_VERSION >= 200112L
|
||||
ret = timer->elapsed / 1e6;
|
||||
#else
|
||||
# error "pb_GetElapsedTime: not implemented for this system"
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
int n;
|
||||
|
||||
timers->wall_begin = get_time();
|
||||
|
||||
timers->current = pb_TimerID_NONE;
|
||||
|
||||
timers->async_markers = NULL;
|
||||
|
||||
|
||||
for (n = 0; n < pb_TimerID_LAST; n++) {
|
||||
pb_ResetTimer(&timers->timers[n]);
|
||||
timers->sub_timer_list[n] = NULL; // free first?
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {
|
||||
|
||||
struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc
|
||||
(sizeof(struct pb_SubTimer));
|
||||
|
||||
int len = strlen(label);
|
||||
|
||||
subtimer->label = (char *) malloc (sizeof(char)*(len+1));
|
||||
sprintf(subtimer->label, "%s\0", label);
|
||||
|
||||
pb_ResetTimer(&subtimer->timer);
|
||||
subtimer->next = NULL;
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category];
|
||||
if (subtimerlist == NULL) {
|
||||
subtimerlist = (struct pb_SubTimerList *) malloc
|
||||
(sizeof(struct pb_SubTimerList));
|
||||
subtimerlist->subtimer_list = subtimer;
|
||||
timers->sub_timer_list[pb_Category] = subtimerlist;
|
||||
} else {
|
||||
// Append to list
|
||||
struct pb_SubTimer *element = subtimerlist->subtimer_list;
|
||||
while (element->next != NULL) {
|
||||
element = element->next;
|
||||
}
|
||||
element->next = subtimer;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category)
|
||||
{
|
||||
|
||||
// switchToSub( NULL, NONE
|
||||
// switchToSub( NULL, some
|
||||
// switchToSub( some, some
|
||||
// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed
|
||||
|
||||
struct pb_Timer *topLevelToStop = NULL;
|
||||
if (timers->current != category && timers->current != pb_TimerID_NONE) {
|
||||
// Switching to subtimer in a different category needs to stop the top-level current, different categoried timer.
|
||||
// NONE shouldn't have a timer associated with it, so exclude from branch
|
||||
topLevelToStop = &timers->timers[timers->current];
|
||||
}
|
||||
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current;
|
||||
|
||||
if (timers->current != pb_TimerID_NONE) {
|
||||
if (curr != NULL && topLevelToStop != NULL) {
|
||||
pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer);
|
||||
} else if (curr != NULL) {
|
||||
pb_StopTimer(&curr->timer);
|
||||
} else {
|
||||
pb_StopTimer(topLevelToStop);
|
||||
}
|
||||
}
|
||||
|
||||
subtimerlist = timers->sub_timer_list[category];
|
||||
struct pb_SubTimer *subtimer = NULL;
|
||||
|
||||
if (label != NULL) {
|
||||
subtimer = subtimerlist->subtimer_list;
|
||||
while (subtimer != NULL) {
|
||||
if (strcmp(subtimer->label, label) == 0) {
|
||||
break;
|
||||
} else {
|
||||
subtimer = subtimer->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (category != pb_TimerID_NONE) {
|
||||
|
||||
if (subtimerlist != NULL) {
|
||||
subtimerlist->current = subtimer;
|
||||
}
|
||||
|
||||
if (category != timers->current && subtimer != NULL) {
|
||||
pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer);
|
||||
} else if (subtimer != NULL) {
|
||||
// Same category, different non-NULL subtimer
|
||||
pb_StartTimer(&subtimer->timer);
|
||||
} else{
|
||||
// Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer
|
||||
pb_StartTimer(&timers->timers[category]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = category;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer)
|
||||
{
|
||||
/* Stop the currently running timer */
|
||||
/*if (timers->current != pb_TimerID_NONE) {
|
||||
struct pb_SubTimer *currSubTimer = NULL;
|
||||
struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current];
|
||||
|
||||
if ( subtimerlist != NULL) {
|
||||
currSubTimer = timers->sub_timer_list[timers->current]->current;
|
||||
}
|
||||
if ( currSubTimer!= NULL) {
|
||||
pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer);
|
||||
} else {
|
||||
pb_StopTimer(&timers->timers[timers->current]);
|
||||
}
|
||||
}
|
||||
|
||||
timers->current = timer;
|
||||
|
||||
if (timer != pb_TimerID_NONE) {
|
||||
pb_StartTimer(&timers->timers[timer]);
|
||||
}*/
|
||||
}
|
||||
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers)
|
||||
{
|
||||
|
||||
pb_Timestamp wall_end = get_time();
|
||||
|
||||
struct pb_Timer *t = timers->timers;
|
||||
struct pb_SubTimer* sub = NULL;
|
||||
|
||||
int maxSubLength;
|
||||
|
||||
const char *categories[] = {
|
||||
"IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute"
|
||||
};
|
||||
|
||||
const int maxCategoryLength = 10;
|
||||
|
||||
int i;
|
||||
for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format
|
||||
if(pb_GetElapsedTime(&t[i]) != 0) {
|
||||
|
||||
// Print Category Timer
|
||||
printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i]));
|
||||
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
maxSubLength = 0;
|
||||
while (sub != NULL) {
|
||||
// Find longest SubTimer label
|
||||
if (strlen(sub->label) > maxSubLength) {
|
||||
maxSubLength = strlen(sub->label);
|
||||
}
|
||||
sub = sub->next;
|
||||
}
|
||||
|
||||
// Fit to Categories
|
||||
if (maxSubLength <= maxCategoryLength) {
|
||||
maxSubLength = maxCategoryLength;
|
||||
}
|
||||
|
||||
sub = timers->sub_timer_list[i]->subtimer_list;
|
||||
|
||||
// Print SubTimers
|
||||
while (sub != NULL) {
|
||||
printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer));
|
||||
sub = sub->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0)
|
||||
printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]));
|
||||
|
||||
float walltime = (wall_end - timers->wall_begin)/ 1e6;
|
||||
printf("Timer Wall Time: %f\n", walltime);
|
||||
|
||||
}
|
||||
|
||||
void pb_DestroyTimerSet(struct pb_TimerSet * timers)
|
||||
{
|
||||
/* clean up all of the async event markers */
|
||||
struct pb_async_time_marker_list ** event = &(timers->async_markers);
|
||||
while( *event != NULL) {
|
||||
struct pb_async_time_marker_list ** next = &((*event)->next);
|
||||
free(*event);
|
||||
(*event) = NULL;
|
||||
event = next;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for(i = 0; i < pb_TimerID_LAST; ++i) {
|
||||
if (timers->sub_timer_list[i] != NULL) {
|
||||
struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list;
|
||||
struct pb_SubTimer *prev = NULL;
|
||||
while (subtimer != NULL) {
|
||||
free(subtimer->label);
|
||||
prev = subtimer;
|
||||
subtimer = subtimer->next;
|
||||
free(prev);
|
||||
}
|
||||
free(timers->sub_timer_list[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
348
benchmarks/opencl/spmv/parboil.h
Normal file
348
benchmarks/opencl/spmv/parboil.h
Normal file
|
@ -0,0 +1,348 @@
|
|||
/*
|
||||
* (c) 2010 The Board of Trustees of the University of Illinois.
|
||||
*/
|
||||
#ifndef PARBOIL_HEADER
|
||||
#define PARBOIL_HEADER
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
/* A platform as specified by the user on the command line */
|
||||
struct pb_PlatformParam {
|
||||
char *name; /* The platform name. This string is owned. */
|
||||
char *version; /* The platform version; may be NULL.
|
||||
* This string is owned. */
|
||||
};
|
||||
|
||||
/* Create a PlatformParam from the given strings.
|
||||
* 'name' must not be NULL. 'version' may be NULL.
|
||||
* If not NULL, the strings should have been allocated by malloc(),
|
||||
* and they will be owned by the returned object.
|
||||
*/
|
||||
struct pb_PlatformParam *
|
||||
pb_PlatformParam(char *name, char *version);
|
||||
|
||||
void
|
||||
pb_FreePlatformParam(struct pb_PlatformParam *);
|
||||
|
||||
/* A criterion for how to select a device */
|
||||
enum pb_DeviceSelectionCriterion {
|
||||
pb_Device_INDEX, /* Enumerate the devices and select one
|
||||
* by its number */
|
||||
pb_Device_CPU, /* Select a CPU device */
|
||||
pb_Device_GPU, /* Select a GPU device */
|
||||
pb_Device_ACCELERATOR, /* Select an accelerator device */
|
||||
pb_Device_NAME /* Select a device by name */
|
||||
};
|
||||
|
||||
/* A device as specified by the user on the command line */
|
||||
struct pb_DeviceParam {
|
||||
enum pb_DeviceSelectionCriterion criterion;
|
||||
union {
|
||||
int index; /* If criterion == pb_Device_INDEX,
|
||||
* the index of the device */
|
||||
char *name; /* If criterion == pb_Device_NAME,
|
||||
* the name of the device.
|
||||
* This string is owned. */
|
||||
};
|
||||
};
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_index(int index);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_cpu(void);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_gpu(void);
|
||||
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_accelerator(void);
|
||||
|
||||
/* Create a by-name device selection criterion.
|
||||
* The string should have been allocated by malloc(), and it will will be
|
||||
* owned by the returned object.
|
||||
*/
|
||||
struct pb_DeviceParam *
|
||||
pb_DeviceParam_name(char *name);
|
||||
|
||||
void
|
||||
pb_FreeDeviceParam(struct pb_DeviceParam *);
|
||||
|
||||
/* Command line parameters for benchmarks */
|
||||
struct pb_Parameters {
|
||||
char *outFile; /* If not NULL, the raw output of the
|
||||
* computation should be saved to this
|
||||
* file. The string is owned. */
|
||||
char **inpFiles; /* A NULL-terminated array of strings
|
||||
* holding the input file(s) for the
|
||||
* computation. The array and strings
|
||||
* are owned. */
|
||||
struct pb_PlatformParam *platform; /* If not NULL, the platform
|
||||
* specified on the command line. */
|
||||
struct pb_DeviceParam *device; /* If not NULL, the device
|
||||
* specified on the command line. */
|
||||
};
|
||||
|
||||
/* Read command-line parameters.
|
||||
*
|
||||
* The argc and argv parameters to main are read, and any parameters
|
||||
* interpreted by this function are removed from the argument list.
|
||||
*
|
||||
* A new instance of struct pb_Parameters is returned.
|
||||
* If there is an error, then an error message is printed on stderr
|
||||
* and NULL is returned.
|
||||
*/
|
||||
struct pb_Parameters *
|
||||
pb_ReadParameters(int *_argc, char **argv);
|
||||
|
||||
/* Free an instance of struct pb_Parameters.
|
||||
*/
|
||||
void
|
||||
pb_FreeParameters(struct pb_Parameters *p);
|
||||
|
||||
void
|
||||
pb_FreeStringArray(char **);
|
||||
|
||||
/* Count the number of input files in a pb_Parameters instance.
|
||||
*/
|
||||
int
|
||||
pb_Parameters_CountInputs(struct pb_Parameters *p);
|
||||
|
||||
/* A time or duration. */
|
||||
//#if _POSIX_VERSION >= 200112L
|
||||
typedef unsigned long long pb_Timestamp; /* time in microseconds */
|
||||
//#else
|
||||
//# error "Timestamps not implemented"
|
||||
//#endif
|
||||
|
||||
enum pb_TimerState {
|
||||
pb_Timer_STOPPED,
|
||||
pb_Timer_RUNNING,
|
||||
};
|
||||
|
||||
struct pb_Timer {
|
||||
enum pb_TimerState state;
|
||||
pb_Timestamp elapsed; /* Amount of time elapsed so far */
|
||||
pb_Timestamp init; /* Beginning of the current time interval,
|
||||
* if state is RUNNING. End of the last
|
||||
* recorded time interfal otherwise. */
|
||||
};
|
||||
|
||||
/* Reset a timer.
|
||||
* Use this to initialize a timer or to clear
|
||||
* its elapsed time. The reset timer is stopped.
|
||||
*/
|
||||
void
|
||||
pb_ResetTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Start a timer. The timer is set to RUNNING mode and
|
||||
* time elapsed while the timer is running is added to
|
||||
* the timer.
|
||||
* The timer should not already be running.
|
||||
*/
|
||||
void
|
||||
pb_StartTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Stop a timer.
|
||||
* This stops adding elapsed time to the timer.
|
||||
* The timer should not already be stopped.
|
||||
*/
|
||||
void
|
||||
pb_StopTimer(struct pb_Timer *timer);
|
||||
|
||||
/* Get the elapsed time in seconds. */
|
||||
double
|
||||
pb_GetElapsedTime(struct pb_Timer *timer);
|
||||
|
||||
/* Execution time is assigned to one of these categories. */
|
||||
enum pb_TimerID {
|
||||
pb_TimerID_NONE = 0,
|
||||
pb_TimerID_IO, /* Time spent in input/output */
|
||||
pb_TimerID_KERNEL, /* Time spent computing on the device,
|
||||
* recorded asynchronously */
|
||||
pb_TimerID_COPY, /* Time spent synchronously moving data
|
||||
* to/from device and allocating/freeing
|
||||
* memory on the device */
|
||||
pb_TimerID_DRIVER, /* Time spent in the host interacting with the
|
||||
* driver, primarily for recording the time
|
||||
* spent queueing asynchronous operations */
|
||||
pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */
|
||||
pb_TimerID_COMPUTE, /* Time for all program execution other
|
||||
* than parsing command line arguments,
|
||||
* I/O, kernel, and copy */
|
||||
pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and
|
||||
* host activity: automatically filled in,
|
||||
* not intended for direct usage */
|
||||
pb_TimerID_LAST /* Number of timer IDs */
|
||||
};
|
||||
|
||||
/* Dynamic list of asynchronously tracked times between events */
|
||||
struct pb_async_time_marker_list {
|
||||
char *label; // actually just a pointer to a string
|
||||
enum pb_TimerID timerID; /* The ID to which the interval beginning
|
||||
* with this marker should be attributed */
|
||||
void * marker;
|
||||
//cudaEvent_t marker; /* The driver event for this marker */
|
||||
struct pb_async_time_marker_list *next;
|
||||
};
|
||||
|
||||
struct pb_SubTimer {
|
||||
char *label;
|
||||
struct pb_Timer timer;
|
||||
struct pb_SubTimer *next;
|
||||
};
|
||||
|
||||
struct pb_SubTimerList {
|
||||
struct pb_SubTimer *current;
|
||||
struct pb_SubTimer *subtimer_list;
|
||||
};
|
||||
|
||||
/* A set of timers for recording execution times. */
|
||||
struct pb_TimerSet {
|
||||
enum pb_TimerID current;
|
||||
struct pb_async_time_marker_list* async_markers;
|
||||
pb_Timestamp async_begin;
|
||||
pb_Timestamp wall_begin;
|
||||
struct pb_Timer timers[pb_TimerID_LAST];
|
||||
struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST];
|
||||
};
|
||||
|
||||
/* Reset all timers in the set. */
|
||||
void
|
||||
pb_InitializeTimerSet(struct pb_TimerSet *timers);
|
||||
|
||||
void
|
||||
pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category);
|
||||
|
||||
/* Select which timer the next interval of time should be accounted
|
||||
* to. The selected timer is started and other timers are stopped.
|
||||
* Using pb_TimerID_NONE stops all timers. */
|
||||
void
|
||||
pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer);
|
||||
|
||||
void
|
||||
pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category);
|
||||
|
||||
/* Print timer values to standard output. */
|
||||
void
|
||||
pb_PrintTimerSet(struct pb_TimerSet *timers);
|
||||
|
||||
/* Release timer resources */
|
||||
void
|
||||
pb_DestroyTimerSet(struct pb_TimerSet * timers);
|
||||
|
||||
void
|
||||
pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr);
|
||||
|
||||
|
||||
typedef struct pb_Device_tag {
|
||||
char* name;
|
||||
void* clDevice;
|
||||
int id;
|
||||
unsigned int in_use;
|
||||
unsigned int available;
|
||||
} pb_Device;
|
||||
|
||||
struct pb_Context_tag;
|
||||
typedef struct pb_Context_tag pb_Context;
|
||||
|
||||
typedef struct pb_Platform_tag {
|
||||
char* name;
|
||||
char* version;
|
||||
void* clPlatform;
|
||||
unsigned int in_use;
|
||||
pb_Context** contexts;
|
||||
pb_Device** devices;
|
||||
} pb_Platform;
|
||||
|
||||
struct pb_Context_tag {
|
||||
void* clPlatformId;
|
||||
void* clContext;
|
||||
void* clDeviceId;
|
||||
pb_Platform* pb_platform;
|
||||
pb_Device* pb_device;
|
||||
};
|
||||
|
||||
// verbosely print out list of platforms and their devices to the console.
|
||||
pb_Platform**
|
||||
pb_GetPlatforms();
|
||||
|
||||
// Choose a platform according to the given platform specification
|
||||
pb_Platform*
|
||||
pb_GetPlatform(struct pb_PlatformParam *platform);
|
||||
|
||||
// choose a platform: by name, name & version
|
||||
pb_Platform*
|
||||
pb_GetPlatformByName(const char* name);
|
||||
|
||||
pb_Platform*
|
||||
pb_GetPlatformByNameAndVersion(const char* name, const char* version);
|
||||
|
||||
// Choose a device according to the given device specification
|
||||
pb_Device*
|
||||
pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device);
|
||||
|
||||
pb_Device**
|
||||
pb_GetDevices(pb_Platform* pb_platform);
|
||||
|
||||
// choose a device by name.
|
||||
pb_Device*
|
||||
pb_GetDeviceByName(pb_Platform* pb_platform, const char* name);
|
||||
|
||||
pb_Platform*
|
||||
pb_GetPlatformByEnvVars();
|
||||
|
||||
pb_Context*
|
||||
pb_InitOpenCLContext(struct pb_Parameters* parameters);
|
||||
|
||||
void
|
||||
pb_ReleasePlatforms();
|
||||
|
||||
void
|
||||
pb_ReleaseContext(pb_Context* c);
|
||||
|
||||
void
|
||||
pb_PrintPlatformInfo(pb_Context* c);
|
||||
|
||||
void
|
||||
perf_init();
|
||||
|
||||
//#define MEASURE_KERNEL_TIME
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef MEASURE_KERNEL_TIME
|
||||
#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c))
|
||||
cl_int
|
||||
pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
|
||||
cl_kernel /* kernel */,
|
||||
cl_uint /* work_dim */,
|
||||
const size_t * /* global_work_offset */,
|
||||
const size_t * /* global_work_size */,
|
||||
const size_t * /* local_work_size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */);
|
||||
#endif
|
||||
|
||||
enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR };
|
||||
void pb_sig_float(char*, float*, int);
|
||||
void pb_sig_double(char*, double*, int);
|
||||
void pb_sig_short(char*, short*, int);
|
||||
void pb_sig_int(char*, int*, int);
|
||||
void pb_sig_uchar(char*, unsigned char*, unsigned int);
|
||||
void pb_sig_clmem(char*, cl_command_queue, cl_mem, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //PARBOIL_HEADER
|
||||
|
1390
benchmarks/opencl/spmv/parboil_opencl.c
Normal file
1390
benchmarks/opencl/spmv/parboil_opencl.c
Normal file
File diff suppressed because it is too large
Load diff
670
benchmarks/opencl/spmv/perf_util.c
Normal file
670
benchmarks/opencl/spmv/perf_util.c
Normal file
|
@ -0,0 +1,670 @@
|
|||
/*
|
||||
* perf_util.c - helper functions for perf_events
|
||||
*
|
||||
* Copyright (c) 2009 Google, Inc
|
||||
* Contributed by Stephane Eranian <eranian@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
||||
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
||||
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <err.h>
|
||||
|
||||
#include <perfmon/pfmlib_perf_event.h>
|
||||
#include "perf_util.h"
|
||||
|
||||
/* the **fd parameter must point to a null pointer on the first call
|
||||
* max_fds and num_fds must both point to a zero value on the first call
|
||||
* The return value is success (0) vs. failure (non-zero)
|
||||
*/
|
||||
int
|
||||
perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds)
|
||||
{
|
||||
perf_event_desc_t *fd;
|
||||
pfm_perf_encode_arg_t arg;
|
||||
int new_max, ret, num, max_fds;
|
||||
int group_leader;
|
||||
|
||||
if (!(argv && fds && num_fds))
|
||||
return -1;
|
||||
|
||||
fd = *fds;
|
||||
if (fd) {
|
||||
max_fds = fd[0].max_fds;
|
||||
if (max_fds < 2)
|
||||
return -1;
|
||||
num = *num_fds;
|
||||
} else {
|
||||
max_fds = num = 0; /* bootstrap */
|
||||
}
|
||||
group_leader = num;
|
||||
|
||||
while(*argv) {
|
||||
if (num == max_fds) {
|
||||
if (max_fds == 0)
|
||||
new_max = 2;
|
||||
else
|
||||
new_max = max_fds << 1;
|
||||
|
||||
if (new_max < max_fds) {
|
||||
warn("too many entries");
|
||||
goto error;
|
||||
}
|
||||
fd = realloc(fd, new_max * sizeof(*fd));
|
||||
if (!fd) {
|
||||
warn("cannot allocate memory");
|
||||
goto error;
|
||||
}
|
||||
/* reset newly allocated chunk */
|
||||
memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd));
|
||||
max_fds = new_max;
|
||||
|
||||
/* update max size */
|
||||
fd[0].max_fds = max_fds;
|
||||
}
|
||||
/* ABI compatibility, set before calling libpfm */
|
||||
fd[num].hw.size = sizeof(fd[num].hw);
|
||||
|
||||
memset(&arg, 0, sizeof(arg));
|
||||
arg.attr = &fd[num].hw;
|
||||
arg.fstr = &fd[num].fstr; /* fd[].fstr is NULL */
|
||||
|
||||
ret = pfm_get_os_event_encoding(*argv, PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &arg);
|
||||
if (ret != PFM_SUCCESS) {
|
||||
warnx("event %s: %s", *argv, pfm_strerror(ret));
|
||||
goto error;
|
||||
}
|
||||
|
||||
fd[num].name = strdup(*argv);
|
||||
fd[num].group_leader = group_leader;
|
||||
fd[num].idx = arg.idx;
|
||||
|
||||
num++;
|
||||
argv++;
|
||||
}
|
||||
*num_fds = num;
|
||||
*fds = fd;
|
||||
return 0;
|
||||
error:
|
||||
perf_free_fds(fd, num);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds)
|
||||
{
|
||||
const char **argv;
|
||||
char *p, *q, *events;
|
||||
int i, ret, num = 0;
|
||||
|
||||
if (!(ev && fd && num_fds))
|
||||
return -1;
|
||||
|
||||
events = strdup(ev);
|
||||
if (!events)
|
||||
return -1;
|
||||
|
||||
q = events;
|
||||
while((p = strchr(q, ','))) {
|
||||
num++;
|
||||
q = p + 1;
|
||||
}
|
||||
num++;
|
||||
num++; /* terminator */
|
||||
|
||||
argv = malloc(num * sizeof(char *));
|
||||
if (!argv) {
|
||||
free(events);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(i=0, q = events; i < num-2; i++, q = p + 1) {
|
||||
p = strchr(q, ',');
|
||||
*p = '\0';
|
||||
argv[i] = q;
|
||||
}
|
||||
argv[i++] = q;
|
||||
argv[i] = NULL;
|
||||
ret = perf_setup_argv_events(argv, fd, num_fds);
|
||||
free(argv);
|
||||
free(events); /* strdup in perf_setup_argv_events() */
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
perf_free_fds(perf_event_desc_t *fds, int num_fds)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < num_fds; i++) {
|
||||
free(fds[i].name);
|
||||
free(fds[i].fstr);
|
||||
}
|
||||
free(fds);
|
||||
}
|
||||
|
||||
int
|
||||
perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx)
|
||||
{
|
||||
int leader;
|
||||
int i;
|
||||
|
||||
if (idx < 0 || idx >= num)
|
||||
return 0;
|
||||
|
||||
leader = fds[idx].group_leader;
|
||||
|
||||
for (i = leader + 1; i < num; i++) {
|
||||
if (fds[i].group_leader != leader) {
|
||||
/* This is a new group leader, so the previous
|
||||
* event was the final event of the preceding
|
||||
* group.
|
||||
*/
|
||||
return i - leader;
|
||||
}
|
||||
}
|
||||
return i - leader;
|
||||
}
|
||||
|
||||
int
|
||||
perf_read_buffer(perf_event_desc_t *hw, void *buf, size_t sz)
|
||||
{
|
||||
struct perf_event_mmap_page *hdr = hw->buf;
|
||||
size_t pgmsk = hw->pgmsk;
|
||||
void *data;
|
||||
unsigned long tail;
|
||||
size_t avail_sz, m, c;
|
||||
|
||||
/*
|
||||
* data points to beginning of buffer payload
|
||||
*/
|
||||
data = ((void *)hdr)+sysconf(_SC_PAGESIZE);
|
||||
|
||||
/*
|
||||
* position of tail within the buffer payload
|
||||
*/
|
||||
tail = hdr->data_tail & pgmsk;
|
||||
|
||||
/*
|
||||
* size of what is available
|
||||
*
|
||||
* data_head, data_tail never wrap around
|
||||
*/
|
||||
avail_sz = hdr->data_head - hdr->data_tail;
|
||||
if (sz > avail_sz)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* sz <= avail_sz, we can satisfy the request
|
||||
*/
|
||||
|
||||
/*
|
||||
* c = size till end of buffer
|
||||
*
|
||||
* buffer payload size is necessarily
|
||||
* a power of two, so we can do:
|
||||
*/
|
||||
c = pgmsk + 1 - tail;
|
||||
|
||||
/*
|
||||
* min with requested size
|
||||
*/
|
||||
m = c < sz ? c : sz;
|
||||
|
||||
/* copy beginning */
|
||||
memcpy(buf, data+tail, m);
|
||||
|
||||
/*
|
||||
* copy wrapped around leftover
|
||||
*/
|
||||
if ((sz - m) > 0)
|
||||
memcpy(buf+m, data, sz - m);
|
||||
|
||||
//printf("\nhead=%lx tail=%lx new_tail=%lx sz=%zu\n", hdr->data_head, hdr->data_tail, hdr->data_tail+sz, sz);
|
||||
hdr->data_tail += sz;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
perf_skip_buffer(perf_event_desc_t *hw, size_t sz)
|
||||
{
|
||||
struct perf_event_mmap_page *hdr = hw->buf;
|
||||
|
||||
if ((hdr->data_tail + sz) > hdr->data_head)
|
||||
sz = hdr->data_head - hdr->data_tail;
|
||||
|
||||
hdr->data_tail += sz;
|
||||
}
|
||||
|
||||
static size_t
|
||||
__perf_handle_raw(perf_event_desc_t *hw)
|
||||
{
|
||||
size_t sz = 0;
|
||||
uint32_t raw_sz, i;
|
||||
char *buf;
|
||||
int ret;
|
||||
|
||||
ret = perf_read_buffer_32(hw, &raw_sz);
|
||||
if (ret) {
|
||||
warnx("cannot read raw size");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sz += sizeof(raw_sz);
|
||||
|
||||
printf("\n\tRAWSZ:%u\n", raw_sz);
|
||||
|
||||
buf = malloc(raw_sz);
|
||||
if (!buf) {
|
||||
warn("cannot allocate raw buffer");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
ret = perf_read_buffer(hw, buf, raw_sz);
|
||||
if (ret) {
|
||||
warnx("cannot read raw data");
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (raw_sz)
|
||||
putchar('\t');
|
||||
|
||||
for(i=0; i < raw_sz; i++) {
|
||||
printf("0x%02x ", buf[i] & 0xff );
|
||||
if (((i+1) % 16) == 0)
|
||||
printf("\n\t");
|
||||
}
|
||||
if (raw_sz)
|
||||
putchar('\n');
|
||||
|
||||
free(buf);
|
||||
|
||||
return sz + raw_sz;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
perf_display_sample(perf_event_desc_t *fds, int num_fds, int idx, struct perf_event_header *ehdr, FILE *fp)
|
||||
{
|
||||
perf_event_desc_t *hw;
|
||||
struct { uint32_t pid, tid; } pid;
|
||||
struct { uint64_t value, id; } grp;
|
||||
uint64_t time_enabled, time_running;
|
||||
size_t sz;
|
||||
uint64_t type, fmt;
|
||||
uint64_t val64;
|
||||
const char *str;
|
||||
int ret, e;
|
||||
|
||||
if (!fds || !fp || !ehdr || num_fds < 0 || idx < 0 || idx >= num_fds)
|
||||
return -1;
|
||||
|
||||
sz = ehdr->size - sizeof(*ehdr);
|
||||
|
||||
hw = fds+idx;
|
||||
|
||||
type = hw->hw.sample_type;
|
||||
fmt = hw->hw.read_format;
|
||||
|
||||
/*
|
||||
* the sample_type information is laid down
|
||||
* based on the PERF_RECORD_SAMPLE format specified
|
||||
* in the perf_event.h header file.
|
||||
* That order is different from the enum perf_event_sample_format
|
||||
*/
|
||||
if (type & PERF_SAMPLE_IP) {
|
||||
const char *xtra = " ";
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx("cannot read IP");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* MISC_EXACT_IP indicates that kernel is returning
|
||||
* th IIP of an instruction which caused the event, i.e.,
|
||||
* no skid
|
||||
*/
|
||||
if (hw->hw.precise_ip && (ehdr->misc & PERF_RECORD_MISC_EXACT_IP))
|
||||
xtra = " (exact) ";
|
||||
|
||||
fprintf(fp, "IIP:%#016"PRIx64"%s", val64, xtra);
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_TID) {
|
||||
ret = perf_read_buffer(hw, &pid, sizeof(pid));
|
||||
if (ret) {
|
||||
warnx( "cannot read PID");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(fp, "PID:%d TID:%d ", pid.pid, pid.tid);
|
||||
sz -= sizeof(pid);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_TIME) {
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read time");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(fp, "TIME:%'"PRIu64" ", val64);
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_ADDR) {
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read addr");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(fp, "ADDR:%#016"PRIx64" ", val64);
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_ID) {
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read id");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(fp, "ID:%"PRIu64" ", val64);
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_STREAM_ID) {
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read stream_id");
|
||||
return -1;
|
||||
}
|
||||
fprintf(fp, "STREAM_ID:%"PRIu64" ", val64);
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_CPU) {
|
||||
struct { uint32_t cpu, reserved; } cpu;
|
||||
ret = perf_read_buffer(hw, &cpu, sizeof(cpu));
|
||||
if (ret) {
|
||||
warnx( "cannot read cpu");
|
||||
return -1;
|
||||
}
|
||||
fprintf(fp, "CPU:%u ", cpu.cpu);
|
||||
sz -= sizeof(cpu);
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_PERIOD) {
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read period");
|
||||
return -1;
|
||||
}
|
||||
fprintf(fp, "PERIOD:%'"PRIu64" ", val64);
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
/* struct read_format {
|
||||
* { u64 value;
|
||||
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
|
||||
* { u64 time_running; } && PERF_FORMAT_RUNNING
|
||||
* { u64 id; } && PERF_FORMAT_ID
|
||||
* } && !PERF_FORMAT_GROUP
|
||||
*
|
||||
* { u64 nr;
|
||||
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
|
||||
* { u64 time_running; } && PERF_FORMAT_RUNNING
|
||||
* { u64 value;
|
||||
* { u64 id; } && PERF_FORMAT_ID
|
||||
* } cntr[nr];
|
||||
* } && PERF_FORMAT_GROUP
|
||||
* };
|
||||
*/
|
||||
if (type & PERF_SAMPLE_READ) {
|
||||
uint64_t values[3];
|
||||
uint64_t nr;
|
||||
|
||||
if (fmt & PERF_FORMAT_GROUP) {
|
||||
ret = perf_read_buffer_64(hw, &nr);
|
||||
if (ret) {
|
||||
warnx( "cannot read nr");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sz -= sizeof(nr);
|
||||
|
||||
time_enabled = time_running = 1;
|
||||
|
||||
if (fmt & PERF_FORMAT_TOTAL_TIME_ENABLED) {
|
||||
ret = perf_read_buffer_64(hw, &time_enabled);
|
||||
if (ret) {
|
||||
warnx( "cannot read timing info");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(time_enabled);
|
||||
}
|
||||
|
||||
if (fmt & PERF_FORMAT_TOTAL_TIME_RUNNING) {
|
||||
ret = perf_read_buffer_64(hw, &time_running);
|
||||
if (ret) {
|
||||
warnx( "cannot read timing info");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(time_running);
|
||||
}
|
||||
|
||||
fprintf(fp, "ENA=%'"PRIu64" RUN=%'"PRIu64" NR=%"PRIu64"\n", time_enabled, time_running, nr);
|
||||
|
||||
values[1] = time_enabled;
|
||||
values[2] = time_running;
|
||||
while(nr--) {
|
||||
grp.id = -1;
|
||||
ret = perf_read_buffer_64(hw, &grp.value);
|
||||
if (ret) {
|
||||
warnx( "cannot read group value");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(grp.value);
|
||||
|
||||
if (fmt & PERF_FORMAT_ID) {
|
||||
ret = perf_read_buffer_64(hw, &grp.id);
|
||||
if (ret) {
|
||||
warnx( "cannot read leader id");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(grp.id);
|
||||
}
|
||||
|
||||
e = perf_id2event(fds, num_fds, grp.id);
|
||||
if (e == -1)
|
||||
str = "unknown sample event";
|
||||
else
|
||||
str = fds[e].name;
|
||||
|
||||
values[0] = grp.value;
|
||||
grp.value = perf_scale(values);
|
||||
|
||||
fprintf(fp, "\t%'"PRIu64" %s (%"PRIu64"%s)\n",
|
||||
grp.value, str,
|
||||
grp.id,
|
||||
time_running != time_enabled ? ", scaled":"");
|
||||
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* this program does not use FORMAT_GROUP when there is only one event
|
||||
*/
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read value");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(val64);
|
||||
|
||||
if (fmt & PERF_FORMAT_TOTAL_TIME_ENABLED) {
|
||||
ret = perf_read_buffer_64(hw, &time_enabled);
|
||||
if (ret) {
|
||||
warnx( "cannot read timing info");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(time_enabled);
|
||||
}
|
||||
|
||||
if (fmt & PERF_FORMAT_TOTAL_TIME_RUNNING) {
|
||||
ret = perf_read_buffer_64(hw, &time_running);
|
||||
if (ret) {
|
||||
warnx( "cannot read timing info");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(time_running);
|
||||
}
|
||||
if (fmt & PERF_FORMAT_ID) {
|
||||
ret = perf_read_buffer_64(hw, &val64);
|
||||
if (ret) {
|
||||
warnx( "cannot read leader id");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(val64);
|
||||
}
|
||||
|
||||
fprintf(fp, "ENA=%'"PRIu64" RUN=%'"PRIu64"\n", time_enabled, time_running);
|
||||
|
||||
values[0] = val64;
|
||||
values[1] = time_enabled;
|
||||
values[2] = time_running;
|
||||
val64 = perf_scale(values);
|
||||
|
||||
fprintf(fp, "\t%'"PRIu64" %s %s\n",
|
||||
val64, fds[0].name,
|
||||
time_running != time_enabled ? ", scaled":"");
|
||||
}
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_CALLCHAIN) {
|
||||
uint64_t nr, ip;
|
||||
|
||||
ret = perf_read_buffer_64(hw, &nr);
|
||||
if (ret) {
|
||||
warnx( "cannot read callchain nr");
|
||||
return -1;
|
||||
}
|
||||
sz -= sizeof(nr);
|
||||
|
||||
while(nr--) {
|
||||
ret = perf_read_buffer_64(hw, &ip);
|
||||
if (ret) {
|
||||
warnx( "cannot read ip");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sz -= sizeof(ip);
|
||||
|
||||
fprintf(fp, "\t0x%"PRIx64"\n", ip);
|
||||
}
|
||||
}
|
||||
|
||||
if (type & PERF_SAMPLE_RAW) {
|
||||
ret = __perf_handle_raw(hw);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
sz -= ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* if we have some data left, it is because there is more
|
||||
* than what we know about. In fact, it is more complicated
|
||||
* because we may have the right size but wrong layout. But
|
||||
* that's the best we can do.
|
||||
*/
|
||||
if (sz) {
|
||||
warnx("did not correctly parse sample leftover=%zu", sz);
|
||||
perf_skip_buffer(hw, sz);
|
||||
}
|
||||
|
||||
fputc('\n',fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
display_lost(perf_event_desc_t *hw, perf_event_desc_t *fds, int num_fds, FILE *fp)
|
||||
{
|
||||
struct { uint64_t id, lost; } lost;
|
||||
const char *str;
|
||||
int e, ret;
|
||||
|
||||
ret = perf_read_buffer(hw, &lost, sizeof(lost));
|
||||
if (ret) {
|
||||
warnx("cannot read lost info");
|
||||
return 0;
|
||||
}
|
||||
|
||||
e = perf_id2event(fds, num_fds, lost.id);
|
||||
if (e == -1)
|
||||
str = "unknown lost event";
|
||||
else
|
||||
str = fds[e].name;
|
||||
|
||||
fprintf(fp, "<<<LOST %"PRIu64" SAMPLES FOR EVENT %s>>>\n",
|
||||
lost.lost,
|
||||
str);
|
||||
|
||||
return lost.lost;
|
||||
}
|
||||
|
||||
void
|
||||
display_exit(perf_event_desc_t *hw, FILE *fp)
|
||||
{
|
||||
struct { pid_t pid, ppid, tid, ptid; } grp;
|
||||
int ret;
|
||||
|
||||
ret = perf_read_buffer(hw, &grp, sizeof(grp));
|
||||
if (ret) {
|
||||
warnx("cannot read exit info");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(fp,"[%d] exited\n", grp.pid);
|
||||
}
|
||||
|
||||
void
|
||||
display_freq(int mode, perf_event_desc_t *hw, FILE *fp)
|
||||
{
|
||||
struct { uint64_t time, id, stream_id; } thr;
|
||||
int ret;
|
||||
|
||||
ret = perf_read_buffer(hw, &thr, sizeof(thr));
|
||||
if (ret) {
|
||||
warnx("cannot read throttling info");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(fp, "%s value=%"PRIu64" event ID=%"PRIu64"\n",
|
||||
mode ? "Throttled" : "Unthrottled",
|
||||
thr.id,
|
||||
thr.stream_id);
|
||||
}
|
184
benchmarks/opencl/spmv/perf_util.h
Normal file
184
benchmarks/opencl/spmv/perf_util.h
Normal file
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* perf_util.h - helper functions for perf_events
|
||||
*
|
||||
* Copyright (c) 2009 Google, Inc
|
||||
* Contributed by Stephane Eranian <eranian@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
||||
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
||||
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __PERF_UTIL_H__
|
||||
#define __PERF_UTIL_H__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <inttypes.h>
|
||||
//#include <err.h>
|
||||
//#include <perfmon/pfmlib_perf_event.h>
|
||||
|
||||
typedef struct {
|
||||
//struct perf_event_attr hw;
|
||||
uint64_t values[3];
|
||||
uint64_t prev_values[3];
|
||||
char *name;
|
||||
uint64_t id; /* event id kernel */
|
||||
void *buf;
|
||||
size_t pgmsk;
|
||||
int group_leader;
|
||||
int fd;
|
||||
int max_fds;
|
||||
int idx; /* opaque libpfm event identifier */
|
||||
char *fstr; /* fstr from library, must be freed */
|
||||
} perf_event_desc_t;
|
||||
|
||||
/* handy shortcut */
|
||||
#define PERF_FORMAT_SCALE (PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, int *num_fds);
|
||||
extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd, int *num_fds);
|
||||
extern int perf_read_buffer(perf_event_desc_t *hw, void *buf, size_t sz);
|
||||
extern void perf_free_fds(perf_event_desc_t *fds, int num_fds);
|
||||
extern void perf_skip_buffer(perf_event_desc_t *hw, size_t sz);
|
||||
|
||||
extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int leader);
|
||||
extern int perf_display_sample(perf_event_desc_t *fds, int num_fds, int idx, struct perf_event_header *ehdr, FILE *fp);
|
||||
extern uint64_t display_lost(perf_event_desc_t *hw, perf_event_desc_t *fds, int num_fds, FILE *fp);
|
||||
extern void display_exit(perf_event_desc_t *hw, FILE *fp);
|
||||
extern void display_freq(int mode, perf_event_desc_t *hw, FILE *fp);
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
perf_read_buffer_32(perf_event_desc_t *hw, void *buf)
|
||||
{
|
||||
return perf_read_buffer(hw, buf, sizeof(uint32_t));
|
||||
}
|
||||
|
||||
static inline int
|
||||
perf_read_buffer_64(perf_event_desc_t *hw, void *buf)
|
||||
{
|
||||
return perf_read_buffer(hw, buf, sizeof(uint64_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* values[0] = raw count
|
||||
* values[1] = TIME_ENABLED
|
||||
* values[2] = TIME_RUNNING
|
||||
*/
|
||||
static inline uint64_t
|
||||
perf_scale(uint64_t *values)
|
||||
{
|
||||
uint64_t res = 0;
|
||||
|
||||
if (!values[2] && !values[1] && values[0]) {
|
||||
//warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n");
|
||||
}
|
||||
|
||||
if (values[2] > values[1]) {
|
||||
//warnx("WARNING: time_running > time_enabled: %llu vs. %llu\n", values[2], values[1]);
|
||||
}
|
||||
|
||||
if (values[2])
|
||||
res = (uint64_t)((double)values[0] * values[1]/values[2]);
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
perf_scale_valid(uint64_t *values, int* valid)
|
||||
{
|
||||
uint64_t res = 0;
|
||||
|
||||
if (!values[2] && !values[1] && values[0]) {
|
||||
//warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n");
|
||||
*valid = 0;
|
||||
}
|
||||
|
||||
if (values[2] > values[1]) {
|
||||
//warnx("WARNING: time_running > time_enabled: %llu vs. %llu\n", values[2], values[1]);
|
||||
*valid = 0;
|
||||
} else {
|
||||
*valid = 1;
|
||||
}
|
||||
|
||||
if (values[2])
|
||||
res = (uint64_t)((double)values[0] * values[1]/values[2]);
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
perf_scale_delta(uint64_t *values, uint64_t *prev_values)
|
||||
{
|
||||
uint64_t res = 0;
|
||||
|
||||
if (!values[2] && !values[1] && values[0])
|
||||
//warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n");
|
||||
|
||||
if (values[2] > values[1])
|
||||
//warnx("WARNING: time_running > time_enabled\n");
|
||||
|
||||
if (values[2] - prev_values[2])
|
||||
res = (uint64_t)((double)((values[0] - prev_values[0]) * (values[1] - prev_values[1])/ (values[2] - prev_values[2])));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* TIME_RUNNING/TIME_ENABLED
|
||||
*/
|
||||
static inline double
|
||||
perf_scale_ratio(uint64_t *values)
|
||||
{
|
||||
if (!values[1])
|
||||
return 0.0;
|
||||
|
||||
return values[2]*1.0/values[1];
|
||||
}
|
||||
|
||||
static inline int
|
||||
perf_fd2event(perf_event_desc_t *fds, int num_events, int fd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i=0; i < num_events; i++)
|
||||
if (fds[i].fd == fd)
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* id = PERF_FORMAT_ID
|
||||
*/
|
||||
static inline int
|
||||
perf_id2event(perf_event_desc_t *fds, int num_events, uint64_t id)
|
||||
{
|
||||
int j;
|
||||
for(j=0; j < num_events; j++)
|
||||
if (fds[j].id == id)
|
||||
return j;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
perf_is_group_leader(perf_event_desc_t *fds, int idx)
|
||||
{
|
||||
return fds[idx].group_leader == idx;
|
||||
}
|
||||
|
||||
#endif
|
281
benchmarks/opencl/spmv/perfmon.c
Normal file
281
benchmarks/opencl/spmv/perfmon.c
Normal file
|
@ -0,0 +1,281 @@
|
|||
#define __STDC_FORMAT_MACROS
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include "perfmon.h"
|
||||
|
||||
static const char* mxpa_profile_log = "mxpa_profile_%d.log";
|
||||
static int enabled = 0;
|
||||
|
||||
// quartet's CPU supports these. Check check_events at libpfm4
|
||||
// to make this adaptable.
|
||||
static const char *gen_events_all[]={
|
||||
"snb_ep::L3_LAT_CACHE:MISS",
|
||||
"snb_ep::L3_LAT_CACHE:REFERENCE",
|
||||
|
||||
"snb_ep::L2_RQSTS:ALL_DEMAND_DATA_RD",
|
||||
"snb_ep::L2_RQSTS:ALL_DEMAND_RD_HIT",
|
||||
|
||||
"perf::PERF_COUNT_HW_CACHE_L1D:ACCESS",
|
||||
"perf::PERF_COUNT_HW_CACHE_L1D:MISS",
|
||||
|
||||
"perf::PERF_COUNT_HW_CACHE_L1D:PREFETCH",
|
||||
"perf::L1-DCACHE-PREFETCH-MISSES",
|
||||
|
||||
"perf::PERF_COUNT_HW_CACHE_L1I:READ",
|
||||
"perf::PERF_COUNT_HW_CACHE_L1I:MISS",
|
||||
|
||||
"perf::ITLB-LOADS",
|
||||
"perf::ITLB-LOAD-MISSES",
|
||||
"perf::DTLB-LOADS",
|
||||
"perf::DTLB-LOAD-MISSES",
|
||||
"perf::CONTEXT-SWITCHES",
|
||||
"perf::CPU-MIGRATIONS",
|
||||
"perf::CYCLES",
|
||||
"snb_ep::RESOURCE_STALLS:ANY",
|
||||
|
||||
"perf::INSTRUCTIONS",
|
||||
"perf::BRANCH-INSTRUCTIONS",
|
||||
"perf::BRANCHES",
|
||||
"perf::BRANCH-MISSES",
|
||||
NULL
|
||||
};
|
||||
|
||||
#define NUM_MAX_THREAD 256
|
||||
|
||||
static perf_event_desc_t *g_fds[NUM_MAX_THREAD];
|
||||
static int g_nthreads;
|
||||
static int num_fds = 0;
|
||||
|
||||
/* note: unsafe for multithreading */
|
||||
static uint64_t* begins;
|
||||
|
||||
static void
|
||||
fetch_counts(perf_event_desc_t *fds, int num_fds)
|
||||
{
|
||||
if (begins == 0) {
|
||||
begins = (uint64_t*) malloc(num_fds * sizeof(uint64_t));
|
||||
memset(begins, 0, num_fds * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
uint64_t val;
|
||||
uint64_t values[3];
|
||||
double ratio;
|
||||
int i;
|
||||
ssize_t ret;
|
||||
|
||||
/*
|
||||
* now read the results. We use pfp_event_count because
|
||||
* libpfm guarantees that counters for the events always
|
||||
* come first.
|
||||
*/
|
||||
memset(values, 0, sizeof(values));
|
||||
|
||||
for (i = 0; i < num_fds; i++) {
|
||||
ret = read(fds[i].fd, values, sizeof(values));
|
||||
if (ret < (ssize_t)sizeof(values)) {
|
||||
if (ret == -1)
|
||||
fprintf(stderr, "cannot read results: %s", strerror(errno));
|
||||
else
|
||||
warnx("could not read event%d", i);
|
||||
}
|
||||
/*
|
||||
* scaling is systematic because we may be sharing the PMU and
|
||||
* thus may be multiplexed
|
||||
*/
|
||||
int valid = 0;
|
||||
val = perf_scale_valid(values, &valid);
|
||||
if (valid == 0) printf ("@i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
|
||||
ratio = perf_scale_ratio(values);
|
||||
|
||||
begins[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_counts(perf_event_desc_t *fds, int num_fds, const char *msg, FILE* fp)
|
||||
{
|
||||
uint64_t val;
|
||||
uint64_t values[3];
|
||||
double ratio;
|
||||
int i;
|
||||
ssize_t ret;
|
||||
|
||||
#if 0
|
||||
fprintf(fp, "%s ------------------------------------\n", msg);
|
||||
#else
|
||||
fprintf(fp, "method=%s", msg);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* now read the results. We use pfp_event_count because
|
||||
* libpfm guarantees that counters for the events always
|
||||
* come first.
|
||||
*/
|
||||
memset(values, 0, sizeof(values));
|
||||
|
||||
for (i = 0; i < num_fds; i++) {
|
||||
|
||||
ret = read(fds[i].fd, values, sizeof(values));
|
||||
if (ret < (ssize_t)sizeof(values)) {
|
||||
if (ret == -1)
|
||||
fprintf(stderr, "cannot read results: %s", strerror(errno));
|
||||
else
|
||||
warnx("could not read event%d", i);
|
||||
}
|
||||
/*
|
||||
* scaling is systematic because we may be sharing the PMU and
|
||||
* thus may be multiplexed
|
||||
*/
|
||||
int valid;
|
||||
val = perf_scale_valid(values, &valid);
|
||||
if (valid == 0) printf ("!i=%d, v0=%llu, v1=%llu, v2=%llu, val=%llu\n", i, values[0], values[1], values[2], val);
|
||||
ratio = perf_scale_ratio(values);
|
||||
|
||||
#if 0
|
||||
fprintf(fp, "%s %'20"PRIu64" %s (%.2f%% scaling, raw=%'"PRIu64", ena=%'"PRIu64", run=%'"PRIu64")\n",
|
||||
"-", // msg,
|
||||
val,
|
||||
fds[i].name,
|
||||
(1.0-ratio)*100.0,
|
||||
values[0],
|
||||
values[1],
|
||||
values[2]);
|
||||
#else
|
||||
fprintf (fp, " %s=%llu", fds[i].name, val); // valid ? val : 0);
|
||||
#endif
|
||||
}
|
||||
fprintf (fp, "\n");
|
||||
}
|
||||
|
||||
FILE* open_log_file(char* fname) {
|
||||
FILE* fp;
|
||||
if (fp = fopen(fname, "r")) {
|
||||
fclose(fp);
|
||||
return fopen(fname, "a");
|
||||
}
|
||||
fp = fopen(fname, "a");
|
||||
return fp;
|
||||
}
|
||||
|
||||
void perf_init() {
|
||||
static int init = 0;
|
||||
|
||||
if (init) return;
|
||||
init = 1;
|
||||
|
||||
char* prof_envvar = getenv("MXPA_PROFILE");
|
||||
if (prof_envvar) {
|
||||
enabled = 1;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
pfm_initialize();
|
||||
}
|
||||
|
||||
static void get_tids(int* tids, int* number) {
|
||||
char path[32];
|
||||
int pid = getpid();
|
||||
sprintf (path, "/proc/%d/task", pid);
|
||||
struct dirent *de=NULL;
|
||||
DIR *d=NULL;
|
||||
d=opendir(path);
|
||||
assert(d != NULL && "Null for opendir");
|
||||
// Loop while not NULL
|
||||
char pid_str[8];
|
||||
char last[8];
|
||||
sprintf (pid_str, "%d", pid);
|
||||
int n = 0;
|
||||
while(de = readdir(d)) {
|
||||
if (!strcmp(de->d_name, ".")) continue;
|
||||
if (!strcmp(de->d_name, "..")) continue;
|
||||
if (!strcmp(de->d_name, pid_str)) continue;
|
||||
*tids++ = atoi(de->d_name);
|
||||
n++;
|
||||
}
|
||||
*number = n;
|
||||
// printf ("Sampling thread %d\n", tid);
|
||||
closedir(d);
|
||||
}
|
||||
|
||||
void perf_start(const char* kname) {
|
||||
if (!enabled) return;
|
||||
|
||||
char* prof_envvar = getenv("MXPA_PROFILE");
|
||||
|
||||
int tids[32];
|
||||
int ntid;
|
||||
get_tids(tids, &ntid);
|
||||
g_nthreads = ntid;
|
||||
|
||||
int n;
|
||||
for (n = 0; n < ntid; n++) {
|
||||
int ret;
|
||||
ret = perf_setup_list_events(prof_envvar, &(g_fds[n]), &num_fds);
|
||||
perf_event_desc_t *fds = g_fds[n];
|
||||
int cpu = -1;
|
||||
int group_fd = -1;
|
||||
int pid = tids[n];
|
||||
fds[0].fd = -1;
|
||||
int i;
|
||||
for(i=0; i < num_fds; i++) {
|
||||
fds[i].hw.read_format = PERF_FORMAT_SCALE;
|
||||
fds[i].hw.disabled = 1; /* do not start now */
|
||||
fds[i].hw.inherit = 1; /* XXX child process will inherit, when forked only? */
|
||||
|
||||
/* each event is in an independent group (multiplexing likely) */
|
||||
fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, group_fd, 0);
|
||||
if (fds[i].fd == -1) {
|
||||
fprintf(stderr, "cannot open event %d\n", i);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
prctl(PR_TASK_PERF_EVENTS_ENABLE);
|
||||
}
|
||||
|
||||
void perf_end(const char* kname) {
|
||||
if (!enabled) return;
|
||||
int i, n;
|
||||
prctl(PR_TASK_PERF_EVENTS_DISABLE);
|
||||
|
||||
static int first_time = 1;
|
||||
if (first_time) {
|
||||
first_time = 0;
|
||||
char name[128];
|
||||
for (n = 0; n < g_nthreads; n++) {
|
||||
sprintf (name, mxpa_profile_log, n);
|
||||
FILE* fp = fopen(name, "w");
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
char name[128];
|
||||
for (n = 0; n < g_nthreads; n++) {
|
||||
sprintf (name, mxpa_profile_log, n);
|
||||
FILE* fp = open_log_file(name);
|
||||
perf_event_desc_t *fds = g_fds[n];
|
||||
print_counts(fds, num_fds, kname, fp);
|
||||
for (i = 0; i < num_fds; i++) close(fds[i].fd);
|
||||
perf_free_fds(fds, num_fds);
|
||||
g_fds[n] = fds = NULL;
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
void pin_trace_enable(char* n) {
|
||||
perf_start((const char*)n);
|
||||
}
|
||||
|
||||
void pin_trace_disable(char* n) {
|
||||
perf_end((const char*)n);
|
||||
}
|
||||
|
||||
|
20
benchmarks/opencl/spmv/perfmon.h
Normal file
20
benchmarks/opencl/spmv/perfmon.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef MXPA_RUNTIME_PERF_MONITOR
|
||||
#define MXPA_RUNTIME_PERF_MONITOR
|
||||
|
||||
#include "perf_util.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void perf_init();
|
||||
void perf_start(const char* kname);
|
||||
void perf_end(const char* kname);
|
||||
void perf_fini();
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
46
benchmarks/opencl/spmv/stub.cc
Normal file
46
benchmarks/opencl/spmv/stub.cc
Normal file
|
@ -0,0 +1,46 @@
|
|||
#ifdef __MXPA__
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <tbb/tbb.h>
|
||||
|
||||
namespace {
|
||||
tbb::task_scheduler_init init;
|
||||
|
||||
class Foo {
|
||||
public:
|
||||
Foo() {}
|
||||
void operator() (const tbb::blocked_range<size_t>& r) const {
|
||||
for (size_t i = r.begin(); i != r.end(); i++) {
|
||||
printf ("");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void
|
||||
mxpa_scheduler_init() {
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0, 100), Foo());
|
||||
#if 0
|
||||
char cmd[32];
|
||||
int pid = getpid();
|
||||
printf("----------\n");
|
||||
sprintf(cmd, "ls -1 /proc/%d/task", pid);
|
||||
system(cmd);
|
||||
printf("----------\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
extern "C"
|
||||
void
|
||||
mxpa_scheduler_init() {
|
||||
}
|
||||
|
||||
#endif
|
||||
|
BIN
benchmarks/opencl/spmv/vector.bin
Normal file
BIN
benchmarks/opencl/spmv/vector.bin
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue