test sources refactoring

This commit is contained in:
Blaise Tine 2021-10-09 10:51:43 -04:00
parent 54bddeee9c
commit ca1d97a3c2
28 changed files with 261 additions and 212 deletions

View file

@ -9,6 +9,8 @@
#include "opae_sim.h"
#include <VX_config.h>
using namespace vortex;
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
if (NULL == handle || flags != 0)
return FPGA_INVALID_PARAM;

View file

@ -1,4 +1,19 @@
__kernel void psort (__global const float *in, __global float *out)
__kernel void psorti (__global const int *in, __global int *out)
{
int gid = get_global_id(0);
int n = get_global_size(0);
int ref = in[gid];
int pos = 0;
for (int i = 0; i < n; ++i) {
int cur = in[i];
pos += (cur < ref) || ((cur == ref) && (i < gid));
}
out[pos] = ref;
}
__kernel void psortf (__global const float *in, __global float *out)
{
int gid = get_global_id(0);
int n = get_global_size(0);
@ -8,7 +23,7 @@ __kernel void psort (__global const float *in, __global float *out)
int pos = 0;
for (int i = 0; i < n; ++i) {
float cur = in[i];
pos += (cur < ref) || (cur == ref && i < gid);
pos += (cur < ref) || ((cur == ref) && (i < gid));
}
out[pos] = ref;
}

View file

@ -7,7 +7,8 @@
#include <string.h>
#include <chrono>
#define KERNEL_NAME "psort"
#define KERNEL0_NAME "psorti"
#define KERNEL1_NAME "psortf"
#define CL_CHECK(_expr) \
do { \
@ -52,14 +53,6 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
return 0;
}
static bool almost_equal(float a, float b, int ulp = 4) {
union fi_t { int i; float f; };
fi_t fa, fb;
fa.f = a;
fb.f = b;
return std::abs(fa.i - fb.i) <= ulp;
}
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
@ -67,8 +60,8 @@ cl_program program = NULL;
cl_kernel kernel = NULL;
cl_mem a_memobj = NULL;
cl_mem c_memobj = NULL;
float *h_a = NULL;
float *h_c = NULL;
int *h_a = NULL;
int *h_c = NULL;
uint8_t *kernel_bin = NULL;
static void cleanup() {
@ -86,15 +79,19 @@ static void cleanup() {
}
int size = 64;
bool float_enable = false;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
printf("Usage: [-f] [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
while ((c = getopt(argc, argv, "fn:h?")) != -1) {
switch (c) {
case 'f':
float_enable = 1;
break;
case 'n':
size = atoi(optarg);
break;
@ -132,7 +129,7 @@ int main (int argc, char **argv) {
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
printf("Allocate device buffers\n");
size_t nbytes = size * sizeof(float);
size_t nbytes = size * sizeof(int);
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
@ -148,21 +145,28 @@ int main (int argc, char **argv) {
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
// Create kernel
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
kernel = CL_CHECK2(clCreateKernel(program, (float_enable ? KERNEL1_NAME : KERNEL0_NAME), &_err));
// Set kernel arguments
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&c_memobj));
// Allocate memories for input arrays and output arrays.
h_a = (float*)malloc(nbytes);
h_c = (float*)malloc(nbytes);
h_a = (int*)malloc(nbytes);
h_c = (int*)malloc(nbytes);
// Initialize values for array members.
for (int i = 0; i < size; ++i) {
h_a[i] = sinf(i)*sinf(i);
h_c[i] = 0xdeadbeef;
printf("*** [%d]: h_a=%f\n", i, h_a[i]);
if (float_enable) {
float value = sinf(i)*sinf(i);
h_a[i] = *(int*)&value;
printf("*** [%d]: h_a=%f\n", i, value);
} else {
int value = size*sinf(i);
h_a[i] = value;
printf("*** [%d]: h_a=%d\n", i, value);
}
}
// Creating command queue
@ -185,17 +189,37 @@ int main (int argc, char **argv) {
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c, 0, NULL, NULL));
printf("Verify result\n");
for (int i = 0; i < size; ++i) {
int value = h_c[i];
if (float_enable) {
printf("*** [%d]: h_a=%f\n", i, *(float*)&value);
} else {
printf("*** [%d]: h_a=%d\n", i, value);
}
}
int errors = 0;
for (int i = 0; i < size; ++i) {
float ref = h_a[i];
int ref = h_a[i];
float ref_f = *(float*)&ref;
int pos = 0;
for (int j = 0; j < size; ++j) {
float cur = h_a[j];
pos += (cur < ref) || (cur == ref && j < i);
int cur = h_a[j];
if (float_enable) {
float cur_f = *(float*)&cur;
pos += (cur_f < ref_f) || (cur_f == ref_f && j < i);
} else {
pos += (cur < ref) || (cur == ref && j < i);
}
}
if (!almost_equal(h_c[pos], ref)) {
if (errors < 100)
printf("*** error: [%d] expected=%f, actual=%f\n", pos, ref, h_c[pos]);
int value = h_c[pos];
if (value != ref) {
if (errors < 100) {
if (float_enable) {
printf("*** error: [%d] expected=%f, actual=%f\n", pos, ref_f, *(float*)&value);
} else {
printf("*** error: [%d] expected=%d, actual=%d\n", pos, ref, value);
}
}
++errors;
}
}

View file

@ -3,10 +3,10 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t count;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -3,7 +3,7 @@
#include "common.h"
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t count = arg->count;
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;

View file

@ -3,12 +3,12 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t num_tasks;
uint32_t task_size;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -3,12 +3,11 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
@ -18,6 +17,6 @@ void kernel_body(int task_id, void* arg) {
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
}

View file

@ -3,10 +3,10 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t num_points;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -5,10 +5,9 @@
// Parallel Selection sort
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
int32_t* src_ptr = (int32_t*)_arg->src_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int value = src_ptr[task_id];
@ -45,6 +44,6 @@ void kernel_body(int task_id, void* arg) {
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
}

View file

@ -3,13 +3,13 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t testid;
uint32_t num_tasks;
uint32_t task_size;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -4,19 +4,18 @@
#include <vx_spawn.h>
#include "common.h"
typedef void (*PFN_Kernel)(int task_id, void* arg);
typedef void (*PFN_Kernel)(int task_id, const kernel_arg_t* arg);
inline float __ieee754_sqrtf (float x) {
asm ("fsqrt.s %0, %1" : "=f" (x) : "f" (x));
return x;
}
void kernel_iadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_iadd(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -27,12 +26,11 @@ void kernel_iadd(int task_id, void* arg) {
}
}
void kernel_imul(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_imul(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -43,12 +41,11 @@ void kernel_imul(int task_id, void* arg) {
}
}
void kernel_idiv(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_idiv(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -59,12 +56,11 @@ void kernel_idiv(int task_id, void* arg) {
}
}
void kernel_idiv_mul(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_idiv_mul(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -77,12 +73,11 @@ void kernel_idiv_mul(int task_id, void* arg) {
}
}
void kernel_fadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fadd(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -93,12 +88,11 @@ void kernel_fadd(int task_id, void* arg) {
}
}
void kernel_fsub(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fsub(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -109,12 +103,11 @@ void kernel_fsub(int task_id, void* arg) {
}
}
void kernel_fmul(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fmul(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -125,12 +118,11 @@ void kernel_fmul(int task_id, void* arg) {
}
}
void kernel_fmadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fmadd(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -141,12 +133,11 @@ void kernel_fmadd(int task_id, void* arg) {
}
}
void kernel_fmsub(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fmsub(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -157,12 +148,11 @@ void kernel_fmsub(int task_id, void* arg) {
}
}
void kernel_fnmadd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fnmadd(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -173,12 +163,11 @@ void kernel_fnmadd(int task_id, void* arg) {
}
}
void kernel_fnmsub(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fnmsub(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -189,12 +178,11 @@ void kernel_fnmsub(int task_id, void* arg) {
}
}
void kernel_fnmadd_madd(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fnmadd_madd(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -207,12 +195,11 @@ void kernel_fnmadd_madd(int task_id, void* arg) {
}
}
void kernel_fdiv(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fdiv(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -223,12 +210,11 @@ void kernel_fdiv(int task_id, void* arg) {
}
}
void kernel_fdiv2(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fdiv2(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -241,12 +227,11 @@ void kernel_fdiv2(int task_id, void* arg) {
}
}
void kernel_fsqrt(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_fsqrt(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -257,12 +242,11 @@ void kernel_fsqrt(int task_id, void* arg) {
}
}
void kernel_ftoi(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_ftoi(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -274,12 +258,11 @@ void kernel_ftoi(int task_id, void* arg) {
}
}
void kernel_ftou(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
float* src0_ptr = (float*)_arg->src0_ptr;
float* src1_ptr = (float*)_arg->src1_ptr;
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
void kernel_ftou(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
float* src0_ptr = (float*)arg->src0_ptr;
float* src1_ptr = (float*)arg->src1_ptr;
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -291,12 +274,11 @@ void kernel_ftou(int task_id, void* arg) {
}
}
void kernel_itof(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_itof(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -308,12 +290,11 @@ void kernel_itof(int task_id, void* arg) {
}
}
void kernel_utof(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_utof(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * count;
for (uint32_t i = 0; i < count; ++i) {
@ -348,6 +329,6 @@ static const PFN_Kernel sc_tests[] = {
};
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, sc_tests[arg->testid], arg);
}

View file

@ -3,12 +3,12 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t num_tasks;
uint32_t task_size;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -3,12 +3,11 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t count = _arg->task_size;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
uint32_t count = arg->task_size;
int32_t* src0_ptr = (int32_t*)arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
uint32_t offset = task_id * count;
@ -20,6 +19,6 @@ void kernel_body(int task_id, void* arg) {
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
}

View file

@ -3,10 +3,10 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t num_points;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -3,10 +3,9 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t* src_ptr = (uint32_t*)_arg->src_ptr;
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
uint32_t* src_ptr = (uint32_t*)arg->src_ptr;
uint32_t* dst_ptr = (uint32_t*)arg->dst_ptr;
int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]);
@ -14,6 +13,6 @@ void kernel_body(int task_id, void* arg) {
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
}

View file

@ -5,13 +5,13 @@
#define NUM_LOADS 8
struct kernel_arg_t {
typedef struct {
uint32_t num_tasks;
uint32_t size;
uint32_t stride;
uint32_t addr_ptr;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -3,12 +3,11 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t stride = _arg->stride;
uint32_t* addr_ptr = (uint32_t*)_arg->addr_ptr;
float* src_ptr = (float*)_arg->src_ptr;
float* dst_ptr = (float*)_arg->dst_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
uint32_t stride = arg->stride;
uint32_t* addr_ptr = (uint32_t*)arg->addr_ptr;
float* src_ptr = (float*)arg->src_ptr;
float* dst_ptr = (float*)arg->dst_ptr;
uint32_t offset = task_id * stride;
@ -24,6 +23,6 @@ void kernel_body(int task_id, void* arg) {
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
}

View file

@ -3,10 +3,10 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t size;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -4,7 +4,7 @@
#include "common.h"
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t size = arg->size;
int32_t* src_ptr = (int32_t*)arg->src_ptr;

View file

@ -3,10 +3,10 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t size;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -4,7 +4,7 @@
#include "common.h"
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t size = arg->size;
int32_t* src_ptr = (int32_t*)arg->src_ptr;

View file

@ -3,9 +3,9 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t num_points;
uint32_t src_ptr;
};
} kernel_arg_t;
#endif

View file

@ -4,13 +4,12 @@
#include <vx_spawn.h>
#include "common.h"
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
int* src_ptr = (int*)_arg->src_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
int* src_ptr = (int*)arg->src_ptr;
vx_printf("task=%d, value=%d\n", task_id, src_ptr[task_id]);
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
}

View file

@ -3,10 +3,10 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
typedef struct {
uint32_t num_points;
uint32_t src_ptr;
uint32_t dst_ptr;
};
} kernel_arg_t;
#endif

View file

@ -20,11 +20,10 @@ int __attribute__((noinline)) __smaller(int index, int tid, int32_t cur_value, i
return ret;
}
void kernel_body(int task_id, void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
uint32_t num_points = _arg->num_points;
int32_t* src_ptr = (int32_t*)_arg->src_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
void kernel_body(int task_id, const kernel_arg_t* arg) {
uint32_t num_points = arg->num_points;
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
int32_t ref_value = src_ptr[task_id];
@ -38,6 +37,6 @@ void kernel_body(int task_id, void* arg) {
}
void main() {
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
const kernel_arg_t* arg = (const kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
vx_spawn_tasks(arg->num_points, kernel_body, arg);
}

View file

@ -20,6 +20,8 @@ int main() {
errors += test_spawn_tasks();
errors += test_serial();
errors += test_tmask();
errors += test_barrier();

View file

@ -193,9 +193,8 @@ typedef struct {
int st_buffer_src[ST_BUF_SZ];
int st_buffer_dst[ST_BUF_SZ];
void st_kernel(int task_id, void * arg) {
st_args_t * arguments = (st_args_t *) arg;
arguments->dst[task_id] = arguments->src[task_id];
void st_kernel(int task_id, const st_args_t * arg) {
arg->dst[task_id] = arg->src[task_id];
}
int test_spawn_tasks() {
@ -216,6 +215,37 @@ int test_spawn_tasks() {
///////////////////////////////////////////////////////////////////////////////
#define SR_BUF_SZ 8
typedef struct {
int * buf;
} sr_args_t;
int sr_buffer[SR_BUF_SZ];
void sr_kernel(const sr_args_t * arg) {
int tid = vx_thread_id();
arg->buf[tid] = 65 + tid;
}
void __attribute__ ((noinline)) do_serial() {
sr_args_t arg;
arg.buf = sr_buffer;
vx_serial(sr_kernel, &arg);
}
int test_serial() {
vx_printf("Serial Test\n");
int num_threads = std::min(vx_num_threads(), 8);
int tmask = make_full_tmask(num_threads);
vx_tmc(tmask);
do_serial();
vx_tmc(1);
return check_error(sr_buffer, 0, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int tmask_buffer[8];
int __attribute__ ((noinline)) do_tmask() {

View file

@ -17,6 +17,8 @@ int test_wsapwn();
int test_spawn_tasks();
int test_serial();
int test_tmask();
int test_barrier();