fixed wspawn's warp synchronization

This commit is contained in:
Blaise Tine 2021-08-15 05:12:27 -07:00
parent a60bfc5e01
commit bc3fa0bb23
5 changed files with 46 additions and 6 deletions

View file

@ -76,7 +76,7 @@ inline void vx_join() {
// Warp Barrier
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
}
// Return active warp's thread id

View file

@ -16,6 +16,7 @@ typedef struct {
int offset;
int N;
int R;
int NW;
} wspawn_tasks_args_t;
typedef struct {
@ -25,6 +26,7 @@ typedef struct {
int offset;
int N;
int R;
int NW;
char isXYpow2;
char isXpow2;
char log2XY;
@ -61,6 +63,9 @@ static void spawn_tasks_callback() {
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
}
// wait for all warps to complete
vx_barrier(0, p_wspawn_args->NW);
// set warp0 to single-threaded and stop other warps
vx_tmc(0 == wid);
}
@ -116,12 +121,13 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
fW = 1;
//--
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW, 0 };
g_wspawn_args[core_id] = &wspawn_args;
//--
if (nW >= 1) {
int nw = MIN(nW, NW);
wspawn_args.NW = nw;
vx_wspawn(nw, spawn_tasks_callback);
spawn_tasks_callback();
}
@ -168,6 +174,9 @@ static void spawn_kernel_callback() {
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
}
// wait for all warps to complete
vx_barrier(0, p_wspawn_args->NW);
// set warp0 to single-threaded and stop other warps
vx_tmc(0 == wid);
}
@ -251,13 +260,14 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void *
//--
wspawn_kernel_args_t wspawn_args = {
ctx, callback, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X
ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X
};
g_wspawn_args[core_id] = &wspawn_args;
//--
if (nW >= 1) {
int nw = MIN(nW, NW);
wspawn_args.NW = nw;
vx_wspawn(nw, spawn_kernel_callback);
spawn_kernel_callback();
}

View file

@ -20,6 +20,8 @@ int main() {
errors += test_tmask();
errors += test_barrier();
if (0 == errors) {
vx_printf("Passed!\n");
} else {

View file

@ -98,7 +98,7 @@ int test_tmc() {
int wspawn_buffer[8];
void simple_kernel() {
void wspawn_kernel() {
unsigned wid = vx_warp_id();
wspawn_buffer[wid] = 65 + wid;
vx_tmc(0 == wid);
@ -107,8 +107,8 @@ void simple_kernel() {
int test_wsapwn() {
vx_printf("Wspawn Test\n");
int num_warps = std::min(vx_num_warps(), 8);
vx_wspawn(num_warps, simple_kernel);
simple_kernel();
vx_wspawn(num_warps, wspawn_kernel);
wspawn_kernel();
return check_error(wspawn_buffer, num_warps);
}
@ -216,4 +216,30 @@ l_start:
vx_tmc(1);
return check_error(tmask_buffer, num_threads);
}
///////////////////////////////////////////////////////////////////////////////
int barrier_buffer[8];
volatile int barrier_ctr;
volatile int barrier_stall;
void barrier_kernel() {
unsigned wid = vx_warp_id();
for (int i = 0; i <= (wid * 256); ++i) {
++barrier_stall;
}
barrier_buffer[wid] = 65 + wid;
vx_barrier(0, barrier_ctr);
vx_tmc(0 == wid);
}
int test_barrier() {
vx_printf("Barrier Test\n");
int num_warps = std::min(vx_num_warps(), 8);
barrier_ctr = num_warps;
barrier_stall = 0;
vx_wspawn(num_warps, barrier_kernel);
barrier_kernel();
return check_error(barrier_buffer, num_warps);
}

View file

@ -17,4 +17,6 @@ int test_spawn_tasks();
int test_tmask();
int test_barrier();
#endif