mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
fixed wspawn's warp synchronization
This commit is contained in:
parent
a60bfc5e01
commit
bc3fa0bb23
5 changed files with 46 additions and 6 deletions
|
@ -76,7 +76,7 @@ inline void vx_join() {
|
|||
|
||||
// Warp Barrier
|
||||
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
|
||||
asm volatile (".insn s 0x6b, 4, %1, 0cd (%0)" :: "r"(barried_id), "r"(num_warps));
|
||||
asm volatile (".insn s 0x6b, 4, %1, 0(%0)" :: "r"(barried_id), "r"(num_warps));
|
||||
}
|
||||
|
||||
// Return active warp's thread id
|
||||
|
|
|
@ -16,6 +16,7 @@ typedef struct {
|
|||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
int NW;
|
||||
} wspawn_tasks_args_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -25,6 +26,7 @@ typedef struct {
|
|||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
int NW;
|
||||
char isXYpow2;
|
||||
char isXpow2;
|
||||
char log2XY;
|
||||
|
@ -61,6 +63,9 @@ static void spawn_tasks_callback() {
|
|||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
}
|
||||
|
||||
// wait for all warps to complete
|
||||
vx_barrier(0, p_wspawn_args->NW);
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
@ -116,12 +121,13 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
|||
fW = 1;
|
||||
|
||||
//--
|
||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
|
||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW, 0 };
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
if (nW >= 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
wspawn_args.NW = nw;
|
||||
vx_wspawn(nw, spawn_tasks_callback);
|
||||
spawn_tasks_callback();
|
||||
}
|
||||
|
@ -168,6 +174,9 @@ static void spawn_kernel_callback() {
|
|||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
}
|
||||
|
||||
// wait for all warps to complete
|
||||
vx_barrier(0, p_wspawn_args->NW);
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
@ -251,13 +260,14 @@ void vx_spawn_kernel(struct context_t * ctx, vx_spawn_kernel_cb callback, void *
|
|||
|
||||
//--
|
||||
wspawn_kernel_args_t wspawn_args = {
|
||||
ctx, callback, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X
|
||||
ctx, callback, arg, core_id * wgs_per_core, fW, rW, 0, isXYpow2, isXpow2, log2XY, log2X
|
||||
};
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
if (nW >= 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
wspawn_args.NW = nw;
|
||||
vx_wspawn(nw, spawn_kernel_callback);
|
||||
spawn_kernel_callback();
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ int main() {
|
|||
|
||||
errors += test_tmask();
|
||||
|
||||
errors += test_barrier();
|
||||
|
||||
if (0 == errors) {
|
||||
vx_printf("Passed!\n");
|
||||
} else {
|
||||
|
|
|
@ -98,7 +98,7 @@ int test_tmc() {
|
|||
|
||||
int wspawn_buffer[8];
|
||||
|
||||
void simple_kernel() {
|
||||
void wspawn_kernel() {
|
||||
unsigned wid = vx_warp_id();
|
||||
wspawn_buffer[wid] = 65 + wid;
|
||||
vx_tmc(0 == wid);
|
||||
|
@ -107,8 +107,8 @@ void simple_kernel() {
|
|||
int test_wsapwn() {
|
||||
vx_printf("Wspawn Test\n");
|
||||
int num_warps = std::min(vx_num_warps(), 8);
|
||||
vx_wspawn(num_warps, simple_kernel);
|
||||
simple_kernel();
|
||||
vx_wspawn(num_warps, wspawn_kernel);
|
||||
wspawn_kernel();
|
||||
|
||||
return check_error(wspawn_buffer, num_warps);
|
||||
}
|
||||
|
@ -216,4 +216,30 @@ l_start:
|
|||
vx_tmc(1);
|
||||
|
||||
return check_error(tmask_buffer, num_threads);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int barrier_buffer[8];
|
||||
volatile int barrier_ctr;
|
||||
volatile int barrier_stall;
|
||||
|
||||
void barrier_kernel() {
|
||||
unsigned wid = vx_warp_id();
|
||||
for (int i = 0; i <= (wid * 256); ++i) {
|
||||
++barrier_stall;
|
||||
}
|
||||
barrier_buffer[wid] = 65 + wid;
|
||||
vx_barrier(0, barrier_ctr);
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
int test_barrier() {
|
||||
vx_printf("Barrier Test\n");
|
||||
int num_warps = std::min(vx_num_warps(), 8);
|
||||
barrier_ctr = num_warps;
|
||||
barrier_stall = 0;
|
||||
vx_wspawn(num_warps, barrier_kernel);
|
||||
barrier_kernel();
|
||||
return check_error(barrier_buffer, num_warps);
|
||||
}
|
|
@ -17,4 +17,6 @@ int test_spawn_tasks();
|
|||
|
||||
int test_tmask();
|
||||
|
||||
int test_barrier();
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue