mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
software updaet for new thread mask design
This commit is contained in:
parent
e4d9fd8a00
commit
d7948a1ce6
24 changed files with 8470 additions and 8430 deletions
|
@ -48,20 +48,20 @@ module VX_gpu_unit #(
|
|||
|
||||
// split
|
||||
|
||||
wire [`NUM_THREADS-1:0] split_then_mask;
|
||||
wire [`NUM_THREADS-1:0] split_else_mask;
|
||||
wire [`NUM_THREADS-1:0] split_then_tmask;
|
||||
wire [`NUM_THREADS-1:0] split_else_tmask;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire taken = gpu_req_if.rs1_data[i][gpu_req_if.tid];
|
||||
assign split_then_mask[i] = gpu_req_if.tmask[i] & taken;
|
||||
assign split_else_mask[i] = gpu_req_if.tmask[i] & ~taken;
|
||||
wire taken = gpu_req_if.rs1_data[i][0];
|
||||
assign split_then_tmask[i] = gpu_req_if.tmask[i] & taken;
|
||||
assign split_else_tmask[i] = gpu_req_if.tmask[i] & ~taken;
|
||||
end
|
||||
|
||||
assign split.valid = is_split;
|
||||
assign split.diverged = (| split_then_mask) && (| split_else_mask);
|
||||
assign split.then_mask = split_then_mask;
|
||||
assign split.else_mask = split_else_mask;
|
||||
assign split.pc = gpu_req_if.next_PC;
|
||||
assign split.valid = is_split;
|
||||
assign split.diverged = (| split_then_tmask) && (| split_else_tmask);
|
||||
assign split.then_tmask = split_then_tmask;
|
||||
assign split.else_tmask = split_else_tmask;
|
||||
assign split.pc = gpu_req_if.next_PC;
|
||||
|
||||
// barrier
|
||||
|
||||
|
|
|
@ -41,8 +41,8 @@ typedef struct packed {
|
|||
typedef struct packed {
|
||||
logic valid;
|
||||
logic diverged;
|
||||
logic [`NUM_THREADS-1:0] then_mask;
|
||||
logic [`NUM_THREADS-1:0] else_mask;
|
||||
logic [`NUM_THREADS-1:0] then_tmask;
|
||||
logic [`NUM_THREADS-1:0] else_tmask;
|
||||
logic [31:0] pc;
|
||||
} gpu_split_t;
|
||||
|
||||
|
|
|
@ -47,7 +47,9 @@ module VX_warp_sched #(
|
|||
reg [`NW_BITS-1:0] scheduled_warp;
|
||||
wire warp_scheduled;
|
||||
|
||||
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
|
||||
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
|
||||
|
||||
wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
|
||||
|
||||
always @(*) begin
|
||||
active_warps_n = active_warps;
|
||||
|
@ -55,14 +57,14 @@ module VX_warp_sched #(
|
|||
active_warps_n = warp_ctl_if.wspawn.wmask;
|
||||
end
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
active_warps_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
active_warps_n[warp_ctl_if.wid] = tmc_active;
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
schedule_table_n = schedule_table;
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
schedule_table_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
|
||||
schedule_table_n[warp_ctl_if.wid] = tmc_active;
|
||||
end
|
||||
if (warp_scheduled) begin // remove scheduled warp (round-robin)
|
||||
schedule_table_n[scheduled_warp] = 0;
|
||||
|
@ -104,12 +106,12 @@ module VX_warp_sched #(
|
|||
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
end else if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_tmask;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -178,6 +180,8 @@ module VX_warp_sched #(
|
|||
// split/join stack management
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_THREADS-1:0] curr_tmask = thread_masks[warp_ctl_if.wid];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.valid
|
||||
|
@ -186,9 +190,9 @@ module VX_warp_sched #(
|
|||
|
||||
wire pop = join_if.valid && (i == join_if.wid);
|
||||
|
||||
wire [`NUM_THREADS-1:0] else_mask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_mask : thread_masks[warp_ctl_if.wid];
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, thread_masks[warp_ctl_if.wid]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_mask};
|
||||
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_tmask : curr_tmask;
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, curr_tmask};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask};
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (1+32+`NUM_THREADS),
|
||||
|
|
|
@ -43,7 +43,8 @@ inline int fast_log2(int x) {
|
|||
}
|
||||
|
||||
static void spawn_tasks_callback() {
|
||||
vx_tmc(vx_num_threads());
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
|
@ -60,11 +61,13 @@ static void spawn_tasks_callback() {
|
|||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
}
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
void spawn_remaining_tasks_callback(int nthreads) {
|
||||
vx_tmc(nthreads);
|
||||
void spawn_remaining_tasks_callback(int thread_mask) {
|
||||
// activate threads
|
||||
vx_tmc(thread_mask);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
int tid = vx_thread_gid();
|
||||
|
@ -74,6 +77,7 @@ void spawn_remaining_tasks_callback(int nthreads) {
|
|||
int task_id = p_wspawn_args->offset + tid;
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
|
@ -132,7 +136,8 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void spawn_kernel_callback() {
|
||||
vx_tmc(vx_num_threads());
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
|
@ -162,11 +167,13 @@ static void spawn_kernel_callback() {
|
|||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
}
|
||||
|
||||
// set warp0 to single-threaded and stop other warps
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
static void spawn_kernel_remaining_callback(int nthreads) {
|
||||
vx_tmc(nthreads);
|
||||
static void spawn_kernel_remaining_callback(int thread_mask) {
|
||||
// activate threads
|
||||
vx_tmc(thread_mask);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
int tid = vx_thread_gid();
|
||||
|
@ -190,6 +197,7 @@ static void spawn_kernel_remaining_callback(int nthreads) {
|
|||
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2);
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ label_exit_next:
|
|||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
# activate all threads
|
||||
csrr a0, CSR_NT # get num threads
|
||||
li a0, -1
|
||||
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
|
||||
|
||||
# set global pointer register
|
||||
|
|
|
@ -267,6 +267,9 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
|
|||
} else if (addr == CSR_GCID) {
|
||||
// Processor coreID
|
||||
return id_;
|
||||
} else if (addr == CSR_TMASK) {
|
||||
// Processor coreID
|
||||
return warps_.at(wid)->getTmask();
|
||||
} else if (addr == CSR_NT) {
|
||||
// Number of threads per warp
|
||||
return arch_.num_threads();
|
||||
|
|
|
@ -817,10 +817,9 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
|
|||
switch (func3) {
|
||||
case 0: {
|
||||
// TMC
|
||||
int active_threads = std::min<int>(rsdata[0], num_threads);
|
||||
tmask_.reset();
|
||||
for (int i = 0; i < active_threads; ++i) {
|
||||
tmask_[i] = true;
|
||||
for (size_t i = 0; i < tmask_.size(); ++i) {
|
||||
tmask_[i] = rsdata[0] & (1 << i);
|
||||
}
|
||||
active_ = tmask_.any();
|
||||
pipeline->stall_warp = true;
|
||||
|
|
|
@ -74,6 +74,12 @@ public:
|
|||
active_ = tmask_.any();
|
||||
}
|
||||
|
||||
Word getTmask() const {
|
||||
if (active_)
|
||||
return tmask_.to_ulong();
|
||||
return 0;
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
return iRegFile_[0][reg];
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue