missing runtime changes from OPAE

This commit is contained in:
Blaise Tine 2020-03-27 22:51:54 -04:00
parent 89d5bfbef1
commit f7e0d1e491
5 changed files with 40 additions and 166 deletions

View file

@ -26,7 +26,7 @@ void vx_print_hex(unsigned f)
} }
void vx_printf(char * c, unsigned f) void vx_printf(const char * c, unsigned f)
{ {
vx_print_str(c); vx_print_str(c);
vx_print_hex(f); vx_print_hex(f);

View file

@ -9,9 +9,9 @@ extern "C" {
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}; static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
void vx_print_hex(unsigned); void vx_print_hex(unsigned);
void vx_printf(char *, unsigned); void vx_printf(const char *, unsigned);
void vx_print_str(char *); void vx_print_str(const char *);
void vx_printc(unsigned, char c); void vx_printc(unsigned, char c);

View file

@ -22,9 +22,11 @@ be:
.type vx_printc, @function .type vx_printc, @function
.global vx_printc .global vx_printc
vx_printc: vx_printc:
la t0, 0x00010000 la t0, print_addr
sw a1, 0(t0) sw a1, 0(t0)
ret ret
.section .data
print_addr:
.word 0x00010000

View file

@ -1,180 +1,73 @@
#pragma once
#include "../intrinsics/vx_intrinsics.h" #include "../intrinsics/vx_intrinsics.h"
#include "vx_api.h" #include "vx_api.h"
#include <inttypes.h> #include <inttypes.h>
#include "../config.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
func_t global_function_pointer; func_t global_function_pointer;
// void (func_t)(void *)
void * global_argument_struct; void * global_argument_struct;
unsigned global_num_threads; unsigned global_num_threads;
void setup_call()
{ void setup_call() {
vx_tmc(global_num_threads); vx_tmc(global_num_threads);
global_function_pointer(global_argument_struct); global_function_pointer(global_argument_struct);
unsigned wid = vx_warpID(); unsigned wid = vx_warpID();
if (wid != 0) if (wid != 0) {
{
vx_tmc(0); // Halt Warp Execution vx_tmc(0); // Halt Warp Execution
} } else {
else
{
vx_tmc(1); // Only activate one thread vx_tmc(1); // Only activate one thread
} }
} }
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
{
global_function_pointer = func_ptr; global_function_pointer = func_ptr;
global_argument_struct = args; global_argument_struct = args;
global_num_threads = numThreads; global_num_threads = numThreads;
vx_wspawn(numWarps, (unsigned) setup_call); vx_wspawn(numWarps, (unsigned) setup_call);
setup_call(); setup_call();
} }
unsigned pocl_threads; unsigned pocl_threads;
uint8_t * pocl_args; struct context_t * pocl_ctx;
uint8_t * pocl_ctx;
vx_pocl_workgroup_func pocl_pfn; vx_pocl_workgroup_func pocl_pfn;
const void * pocl_args;
unsigned global_z; void pocl_spawn_runonce() {
unsigned global_y;
unsigned global_x;
void pocl_spawn_real()
{
vx_tmc(pocl_threads); vx_tmc(pocl_threads);
int base_x = vx_threadID();
int base_y = vx_warpID();
int local_x; int x = vx_threadID();
int local_y; int y = vx_warpID();
for (int iter_z = 0; iter_z < global_z; iter_z++) (pocl_pfn)(pocl_args, pocl_ctx, x, y, 0);
{
for (int iter_x = 0; iter_x < global_x; iter_x++)
{
for (int iter_y = 0; iter_y < global_y; iter_y++)
{
local_x = (iter_x * TOTAL_THREADS) + base_x; if (y != 0) {
local_y = (iter_y * TOTAL_WARPS ) + base_y;
(pocl_pfn)( pocl_args, pocl_ctx, local_x, local_y, iter_z);
}
}
}
// (pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
if (base_y != 0)
{
vx_tmc(0); vx_tmc(0);
} }
vx_tmc(1); vx_tmc(1);
} }
void pocl_spawn(struct context_t * ctx, vx_pocl_workgroup_func pfn, const void * args) {
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments) if (ctx->num_groups[2] > 1) {
{ printf("ERROR: pocl_spawn doesn't support Z dimension yet!\n");
return;
// printf("ctx->num_groups[0]: %d\n", ctx->num_groups[0]);
// printf("ctx->num_groups[1]: %d\n", ctx->num_groups[1]);
// printf("ctx->num_groups[2]: %d\n", ctx->num_groups[2]);
// printf("\n\n");
// printf("ctx->local_size[0]: %d\n", ctx->local_size[0]);
// printf("ctx->local_size[1]: %d\n", ctx->local_size[1]);
// printf("ctx->local_size[2]: %d\n", ctx->local_size[2]);
if (ctx->num_groups[0] > TOTAL_THREADS)
{
pocl_threads = TOTAL_THREADS;
global_x = ctx->num_groups[0] / TOTAL_THREADS;
// printf("pocl_threads: %d\n", pocl_threads);
// printf("global_x: %d\n", global_x);
} }
else
{
pocl_threads = ctx->num_groups[0]; pocl_threads = ctx->num_groups[0];
global_x = 1; pocl_ctx = ctx;
// printf("pocl_threads: %d\n", pocl_threads); pocl_pfn = pfn;
// printf("global_x: %d\n", global_x); pocl_args = args;
if (ctx->num_groups[1] > 1) {
vx_wspawn(ctx->num_groups[1], (unsigned)&pocl_spawn_runonce);
} }
pocl_spawn_runonce();
global_z = ctx->num_groups[2];
pocl_pfn = (vx_pocl_workgroup_func) pfn;
pocl_ctx = (uint8_t *) ctx;
pocl_args = (uint8_t *) arguments;
if (ctx->num_groups[1] > 1)
{
if (ctx->num_groups[1] > TOTAL_WARPS)
{
global_y = ctx->num_groups[1] / TOTAL_WARPS;
vx_wspawn(TOTAL_WARPS, (unsigned) &pocl_spawn_real);
// printf("global_y: %d\n", global_y);
// printf("Warps: %d\n", TOTAL_WARPS);
}
else
{
global_y = 1;
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
// printf("global_y: %d\n", global_y);
// printf("Warps: %d\n", ctx->num_groups[1]);
}
}
unsigned starting_cycles = vx_getCycles();
unsigned starting_inst = vx_getInst();
pocl_spawn_real();
unsigned end_cycles = vx_getCycles();
unsigned end_inst = vx_getInst();
unsigned total_cycles = (unsigned) (end_cycles - starting_cycles);
// float total_inst = (float) (end_inst - starting_inst );
// float ipc = total_inst/total_cycles;
printf("%d\n", total_cycles);
vx_tmc(0);
// printf("pocl_spawn: Total Cycles: %d\n", );
// printf("pocl_spawn: Total Inst : %d\n", (end_inst - starting_inst ));
// int z;
// int y;
// int x;
// for (z = 0; z < ctx->num_groups[2]; ++z)
// {
// for (y = 0; y < ctx->num_groups[1]; ++y)
// {
// for (x = 0; x < ctx->num_groups[0]; ++x)
// {
// (use_pfn)((uint8_t *)arguments, (uint8_t *)ctx, x, y, z);
// }
// }
// }
} }
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -1,6 +1,5 @@
#ifndef VX_API_ #ifndef VX_API_
#define VX_API_ #define VX_API_
#include <inttypes.h> #include <inttypes.h>
@ -14,47 +13,27 @@ typedef void (*func_t)(void *);
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args); void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
// struct context_t {
// unsigned num_groups[3]; // use {2, 1, 1} for vecadd
// unsigned global_offset[3]; // use {0, 0, 0} for vecadd
// unsigned local_size[3]; // use {2, 1, 1} for vecadd
// unsigned char *printf_buffer; // zero for now
// unsigned *printf_buffer_position; // initialized to zero
// unsigned printf_buffer_capacity; // zero for now
// unsigned work_dim; // use 1 for vecadd
// };
struct context_t { struct context_t {
uint32_t num_groups[3]; uint32_t num_groups[3];
uint32_t global_offset[3]; uint32_t global_offset[3];
uint32_t local_size[3]; uint32_t local_size[3];
uint8_t *printf_buffer; char * printf_buffer;
uint32_t *printf_buffer_position; uint32_t *printf_buffer_position;
uint32_t printf_buffer_capacity; uint32_t printf_buffer_capacity;
uint32_t work_dim; uint32_t work_dim;
}; };
/* The default work-group function prototype as generated by Workgroup.cc. */ /* The default work-group function prototype as generated by Workgroup.cc. */
typedef void (*vx_pocl_workgroup_func) (uint8_t * /* args */, typedef void (*vx_pocl_workgroup_func) (const void * /* args */,
uint8_t * /* pocl_context */, const struct context_t * /* context */,
uint32_t /* group_x */, uint32_t /* group_x */,
uint32_t /* group_y */, uint32_t /* group_y */,
uint32_t /* group_z */); uint32_t /* group_z */);
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments); void pocl_spawn(struct context_t * ctx, vx_pocl_workgroup_func pfn, const void * args);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif #endif