adding opencl sgemm and vecadd command line options

This commit is contained in:
Blaise Tine 2021-01-10 22:47:25 -08:00
parent 7e93d253f2
commit ea2b73d5b0
4 changed files with 88 additions and 23 deletions

View file

@ -4,6 +4,8 @@ SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime
OPTS ?= -n64
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
@ -33,19 +35,19 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-ase: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View file

@ -3,10 +3,10 @@
#include <assert.h>
#include <CL/opencl.h>
#include <string.h>
#include <time.h>
#include <time.h>
#include <unistd.h>
#include <chrono>
#define SIZE 32
#define KERNEL_NAME "sgemm"
#define CL_CHECK(_expr) \
@ -101,7 +101,40 @@ static void cleanup() {
if (h_c) free(h_c);
}
int size = 64;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
switch (c) {
case 'n':
size = atoi(optarg);
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
if (size < 2) {
printf("Error: invalid size!\n");
exit(-1);
}
}
int main (int argc, char **argv) {
// parse command arguments
parse_args(argc, argv);
cl_platform_id platform_id;
size_t kernel_size;
cl_int binary_status;
@ -120,7 +153,7 @@ int main (int argc, char **argv) {
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
// Allocate device buffers
size_t nbytes = SIZE * SIZE * sizeof(float);
size_t nbytes = size * size * sizeof(float);
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
@ -140,7 +173,7 @@ int main (int argc, char **argv) {
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
// Set kernel arguments
int width = SIZE;
int width = size;
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
@ -152,7 +185,7 @@ int main (int argc, char **argv) {
h_c = (float*)malloc(nbytes);
// Initialize values for array members.
for (int i = 0; i < (SIZE * SIZE); ++i) {
for (int i = 0; i < (size * size); ++i) {
h_a[i] = (float)rand() / (float)RAND_MAX;
h_b[i] = (float)rand() / (float)RAND_MAX;
h_c[i] = 0xdeadbeef;
@ -167,7 +200,7 @@ int main (int argc, char **argv) {
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b, 0, NULL, NULL));
printf("Execute the kernel\n");
size_t global_work_size[2] = {SIZE, SIZE};
size_t global_work_size[2] = {size, size};
size_t local_work_size[2] = {1, 1};
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL));
@ -182,8 +215,8 @@ int main (int argc, char **argv) {
printf("Verify result\n");
int errors = 0;
float* h_ref = (float*)malloc(nbytes);
matmul(h_ref, h_a, h_b, SIZE, SIZE, SIZE);
for (int i = 0; i < (SIZE * SIZE); i++) {
matmul(h_ref, h_a, h_b, size, size, size);
for (int i = 0; i < (size * size); i++) {
if (!almost_equal(h_c[i], h_ref[i])) {
printf("*** error: [%d] expected=%f, actual=%f\n", i, h_ref[i], h_c[i]);
++errors;

View file

@ -4,6 +4,8 @@ SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime
OPTS ?= -n64
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
@ -33,19 +35,19 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-ase: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View file

@ -3,10 +3,10 @@
#include <assert.h>
#include <math.h>
#include <CL/opencl.h>
#include <unistd.h>
#include <string.h>
#include <chrono>
#define SIZE 256
#define KERNEL_NAME "vecadd"
#define CL_CHECK(_expr) \
@ -89,7 +89,35 @@ static void cleanup() {
if (h_c) free(h_c);
}
int size = 64;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
switch (c) {
case 'n':
size = atoi(optarg);
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
int main (int argc, char **argv) {
// parse command arguments
parse_args(argc, argv);
cl_platform_id platform_id;
size_t kernel_size;
cl_int binary_status;
@ -106,7 +134,7 @@ int main (int argc, char **argv) {
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
printf("Allocate device buffers\n");
size_t nbytes = SIZE * sizeof(float);
size_t nbytes = size * sizeof(float);
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
@ -136,7 +164,7 @@ int main (int argc, char **argv) {
h_c = (float*)malloc(nbytes);
// Initialize values for array members.
for (int i = 0; i < SIZE; ++i) {
for (int i = 0; i < size; ++i) {
h_a[i] = sinf(i)*sinf(i);
h_b[i] = cosf(i)*cosf(i);
h_c[i] = 0xdeadbeef;
@ -151,7 +179,7 @@ int main (int argc, char **argv) {
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b, 0, NULL, NULL));
printf("Execute the kernel\n");
size_t global_work_size[1] = {SIZE};
size_t global_work_size[1] = {size};
size_t local_work_size[1] = {1};
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL));
@ -165,7 +193,7 @@ int main (int argc, char **argv) {
printf("Verify result\n");
int errors = 0;
for (int i = 0; i < SIZE; ++i) {
for (int i = 0; i < size; ++i) {
float ref = h_a[i] + h_b[i];
if (!almost_equal(h_c[i], ref)) {
printf("*** error: [%d] expected=%f, actual=%f, a=%f, b=%f\n", i, ref, h_c[i], h_a[i], h_b[i]);