mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
vx_serial runtime API
This commit is contained in:
parent
342c07f8d6
commit
47d317f17a
6 changed files with 163 additions and 46 deletions
|
@ -5,12 +5,12 @@ AR = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc-ar
|
|||
DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
CFLAGS += -O3 -march=rv32imf -mabi=ilp32f -fno-exceptions -fdata-sections -ffunction-sections
|
||||
CFLAGS += -O3 -march=rv32imf -mabi=ilp32f -Wstack-usage=1024 -fno-exceptions -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I./include -I../hw
|
||||
|
||||
PROJECT = libvortexrt
|
||||
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_perf.c
|
||||
SRCS = ./src/vx_start.S ./src/vx_syscalls.c ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_spawn.S ./src/vx_perf.c
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
|
||||
|
||||
|
|
|
@ -26,11 +26,13 @@ typedef void (*pfn_workgroup_func) (
|
|||
uint32_t /* group_z */
|
||||
);
|
||||
|
||||
typedef void (*pfn_callback)(int task_id, const void *arg);
|
||||
typedef void (*pfn_callback)(int task_id, void *arg);
|
||||
|
||||
void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, const void * args);
|
||||
void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, void * arg);
|
||||
|
||||
void vx_spawn_tasks(int num_tasks, pfn_callback callback, const void * args);
|
||||
void vx_spawn_tasks(int num_tasks, pfn_callback callback, void * arg);
|
||||
|
||||
void vx_serial(pfn_callback callback, void * arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <vx_print.h>
|
||||
#include <vx_spawn.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
@ -8,45 +9,32 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int __attribute__((noinline)) __vprintf(int index, int tid, const char* format, va_list va) {
|
||||
__if (index == tid) {
|
||||
return vprintf(format, va);
|
||||
}__endif
|
||||
return 0;
|
||||
struct printf_arg_t {
|
||||
const char* format;
|
||||
va_list va;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void __printf_callback(int task_id, void* arg) {
|
||||
struct printf_arg_t* p_arg = (struct printf_arg_t*)(arg);
|
||||
p_arg->ret = vprintf(p_arg->format, p_arg->va);
|
||||
}
|
||||
|
||||
int vx_vprintf(const char* format, va_list va) {
|
||||
int ret = 0;
|
||||
|
||||
// need to execute single-threaded due to potential thread-data dependency
|
||||
// use manual goto loop to disable compiler optimizations affceting split/join placement
|
||||
|
||||
volatile int nt = vx_num_threads();
|
||||
int tid = vx_thread_id();
|
||||
|
||||
for (int i = 0; i < nt; ++i) {
|
||||
ret |= __vprintf(i, tid, format, va);
|
||||
}
|
||||
|
||||
return ret;
|
||||
// need to execute 'vprintf' single-threaded due to potential thread-data dependency
|
||||
struct printf_arg_t arg;
|
||||
arg.format = format;
|
||||
arg.va = va;
|
||||
vx_serial(__printf_callback, &arg);
|
||||
return arg.ret;
|
||||
}
|
||||
|
||||
int vx_printf(const char * format, ...) {
|
||||
int ret = 0;
|
||||
|
||||
// need to execute single-threaded due to potential thread-data dependency
|
||||
// use manual goto loop to disable compiler optimizations affceting split/join placement
|
||||
|
||||
volatile int nt = vx_num_threads();
|
||||
int tid = vx_thread_id();
|
||||
|
||||
int ret;
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
for (int i = 0; i < nt; ++i) {
|
||||
ret |= __vprintf(i, tid, format, va);
|
||||
}
|
||||
va_end(va);
|
||||
|
||||
ret = vx_vprintf(format, va);
|
||||
va_end(va);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
36
runtime/src/vx_spawn.S
Normal file
36
runtime/src/vx_spawn.S
Normal file
|
@ -0,0 +1,36 @@
|
|||
.type vx_serial, @function
|
||||
.global vx_serial
|
||||
vx_serial:
|
||||
addi sp, sp, -24
|
||||
sw ra, 20(sp)
|
||||
sw s4, 16(sp)
|
||||
sw s3, 12(sp)
|
||||
sw s2, 8(sp)
|
||||
sw s1, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
mv s4, a0 # callback
|
||||
mv s3, a1 # arg
|
||||
csrr s2, 0xfc0 # NT
|
||||
csrr s1, 0xcc0 # tid
|
||||
li s0, 0 # index
|
||||
label_loop:
|
||||
sub t0, s0, s1
|
||||
snez t0, t0
|
||||
.insn s 0x6b, 2, x0, 0(t0) # split t0
|
||||
bnez t0, label_join
|
||||
mv a0, s0 # a0 <- index
|
||||
mv a1, s3 # a1 <- arg
|
||||
jalr s4 # callback(index, arg)
|
||||
label_join:
|
||||
.insn s 0x6b, 3, x0, 0(x0) # join
|
||||
addi s0, s0, 1
|
||||
blt s0, s2, label_loop
|
||||
lw ra, 20(sp)
|
||||
lw s4, 16(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s2, 8(sp)
|
||||
lw s1, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 24
|
||||
|
||||
ret
|
|
@ -12,7 +12,7 @@ extern "C" {
|
|||
|
||||
typedef struct {
|
||||
pfn_callback callback;
|
||||
const void * args;
|
||||
void * arg;
|
||||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
|
@ -21,7 +21,7 @@ typedef struct {
|
|||
typedef struct {
|
||||
struct context_t * ctx;
|
||||
pfn_workgroup_func wg_func;
|
||||
const void * args;
|
||||
void * arg;
|
||||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
|
@ -57,7 +57,7 @@ static void spawn_tasks_callback() {
|
|||
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
|
||||
|
||||
for (int task_id = offset, N = task_id + tK; task_id < N; ++task_id) {
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->args);
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
}
|
||||
|
||||
vx_tmc(0 == wid);
|
||||
|
@ -72,12 +72,12 @@ void spawn_remaining_tasks_callback(int nthreads) {
|
|||
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[core_id];
|
||||
|
||||
int task_id = p_wspawn_args->offset + tid;
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->args);
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
void vx_spawn_tasks(int num_tasks, pfn_callback callback , const void * args) {
|
||||
void vx_spawn_tasks(int num_tasks, pfn_callback callback , void * arg) {
|
||||
// device specs
|
||||
int NC = vx_num_cores();
|
||||
int NW = vx_num_warps();
|
||||
|
@ -112,7 +112,7 @@ void vx_spawn_tasks(int num_tasks, pfn_callback callback , const void * args) {
|
|||
fW = 1;
|
||||
|
||||
//--
|
||||
wspawn_tasks_args_t wspawn_args = { callback, args, core_id * tasks_per_core, fW, rW };
|
||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
|
@ -159,7 +159,7 @@ static void spawn_kernel_callback() {
|
|||
int gid1 = p_wspawn_args->ctx->global_offset[1] + j;
|
||||
int gid2 = p_wspawn_args->ctx->global_offset[2] + k;
|
||||
|
||||
(p_wspawn_args->wg_func)(p_wspawn_args->args, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
(p_wspawn_args->wg_func)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
}
|
||||
|
||||
vx_tmc(0 == wid);
|
||||
|
@ -188,12 +188,12 @@ static void spawn_kernel_remaining_callback(int nthreads) {
|
|||
int gid1 = p_wspawn_args->ctx->global_offset[1] + j;
|
||||
int gid2 = p_wspawn_args->ctx->global_offset[2] + k;
|
||||
|
||||
(p_wspawn_args->wg_func)(p_wspawn_args->args, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
(p_wspawn_args->wg_func)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, const void * args) {
|
||||
void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, void * arg) {
|
||||
// total number of WGs
|
||||
int X = ctx->num_groups[0];
|
||||
int Y = ctx->num_groups[1];
|
||||
|
@ -241,7 +241,7 @@ void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, const v
|
|||
char log2X = fast_log2(X);
|
||||
|
||||
//--
|
||||
wspawn_kernel_args_t wspawn_args = { ctx, wg_func, args, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X };
|
||||
wspawn_kernel_args_t wspawn_args = { ctx, wg_func, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X };
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
|
|
91
runtime/src/vx_syscalls.c
Normal file
91
runtime/src/vx_syscalls.c
Normal file
|
@ -0,0 +1,91 @@
|
|||
#include <sys/stat.h>
|
||||
#include <newlib.h>
|
||||
#include <unistd.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_print.h>
|
||||
|
||||
int _close(int file) { return -1; }
|
||||
|
||||
int _fstat(int file, struct stat *st) { return -1; }
|
||||
|
||||
int _isatty(int file) { return 0; }
|
||||
|
||||
int _lseek(int file, int ptr, int dir) { return 0; }
|
||||
|
||||
int _open(const char *name, int flags, int mode) { return -1; }
|
||||
|
||||
int _read(int file, char *ptr, int len) { return -1; }
|
||||
|
||||
caddr_t _sbrk(int incr) { return 0; }
|
||||
|
||||
int _write(int file, char *ptr, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
vx_putchar(*ptr++);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
int _kill(int pid, int sig) { return -1; }
|
||||
|
||||
int _getpid() {
|
||||
return vx_warp_gid();
|
||||
}
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
|
||||
/* These magic symbols are provided by the linker. */
|
||||
extern void (*__preinit_array_start []) (void) __attribute__((weak));
|
||||
extern void (*__preinit_array_end []) (void) __attribute__((weak));
|
||||
extern void (*__init_array_start []) (void) __attribute__((weak));
|
||||
extern void (*__init_array_end []) (void) __attribute__((weak));
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
extern void _init (void);
|
||||
#endif
|
||||
|
||||
/* Iterate over all the init routines. */
|
||||
void
|
||||
__libc_init_array (void)
|
||||
{
|
||||
size_t count;
|
||||
size_t i;
|
||||
|
||||
count = __preinit_array_end - __preinit_array_start;
|
||||
for (i = 0; i < count; i++)
|
||||
__preinit_array_start[i] ();
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
_init ();
|
||||
#endif
|
||||
|
||||
count = __init_array_end - __init_array_start;
|
||||
for (i = 0; i < count; i++)
|
||||
__init_array_start[i] ();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
extern void (*__fini_array_start []) (void) __attribute__((weak));
|
||||
extern void (*__fini_array_end []) (void) __attribute__((weak));
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
extern void _fini (void);
|
||||
#endif
|
||||
|
||||
/* Run all the cleanup routines. */
|
||||
void
|
||||
__libc_fini_array (void)
|
||||
{
|
||||
size_t count;
|
||||
size_t i;
|
||||
|
||||
count = __fini_array_end - __fini_array_start;
|
||||
for (i = count; i > 0; i--)
|
||||
__fini_array_start[i-1] ();
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
_fini ();
|
||||
#endif
|
||||
}
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue