mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
adding predicate instruction
This commit is contained in:
parent
6caf674163
commit
c162ce526f
7 changed files with 57 additions and 32 deletions
|
@ -349,7 +349,7 @@ module VX_decode #(
|
|||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(`GPU_TMC);
|
||||
op_type = `OP_BITS'(rs2 ? `GPU_PRED : `GPU_TMC);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
|
|
@ -185,7 +185,7 @@
|
|||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_PRED 3'h5
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ module VX_gpu_unit #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (gpu_req_if.op_mod)
|
||||
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
|
@ -29,11 +30,18 @@ module VX_gpu_unit #(
|
|||
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
|
||||
wire is_pred = (gpu_req_if.op_type == `GPU_PRED);
|
||||
|
||||
// tmc
|
||||
|
||||
assign tmc.valid = is_tmc;
|
||||
assign tmc.tmask = `NUM_THREADS'(gpu_req_if.rs1_data[gpu_req_if.tid]);
|
||||
wire [`NUM_THREADS-1:0] pred_cond;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign pred_cond[i] = gpu_req_if.tmask[i] && gpu_req_if.rs1_data[i][0];
|
||||
end
|
||||
wire [`NUM_THREADS-1:0] pred = (pred_cond != 0) ? pred_cond : gpu_req_if.tmask;
|
||||
|
||||
assign tmc.valid = is_tmc || is_pred;
|
||||
assign tmc.tmask = is_pred ? pred : `NUM_THREADS'(gpu_req_if.rs1_data[gpu_req_if.tid]);
|
||||
|
||||
// wspawn
|
||||
|
||||
|
|
|
@ -128,6 +128,7 @@ task print_ex_op (
|
|||
`GPU_SPLIT: dpi_trace("SPLIT");
|
||||
`GPU_JOIN: dpi_trace("JOIN");
|
||||
`GPU_BAR: dpi_trace("BAR");
|
||||
`GPU_BAR: dpi_trace("PRED");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -74,33 +74,33 @@ module VX_warp_sched #(
|
|||
active_warps[0] <= '1;
|
||||
thread_masks[0] <= '1;
|
||||
end else begin
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_masks[warp_ctl_if.barrier.id] <= 0;
|
||||
if (warp_ctl_if.valid) begin
|
||||
if (warp_ctl_if.wspawn.valid) begin
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end else begin
|
||||
barrier_masks[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
stalled_warps[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_tmask;
|
||||
|
||||
if (warp_ctl_if.barrier.valid) begin
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_masks[warp_ctl_if.barrier.id] <= 0;
|
||||
end else begin
|
||||
barrier_masks[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.split.valid) begin
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_tmask;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// Branch
|
||||
if (branch_ctl_if.valid) begin
|
||||
if (branch_ctl_if.taken) begin
|
||||
|
|
|
@ -53,8 +53,13 @@ extern "C" {
|
|||
})
|
||||
|
||||
// Set thread mask
|
||||
inline void vx_tmc(unsigned num_threads) {
|
||||
asm volatile (".insn s 0x6b, 0, x0, 0(%0)" :: "r"(num_threads));
|
||||
inline void vx_tmc(unsigned mask) {
|
||||
asm volatile (".insn s 0x6b, 0, x0, 0(%0)" :: "r"(mask));
|
||||
}
|
||||
|
||||
// Set thread predicate
|
||||
inline void vx_pred(unsigned condition) {
|
||||
asm volatile (".insn s 0x6b, 0, x1, 0(%0)" :: "r"(condition));
|
||||
}
|
||||
|
||||
typedef void (*vx_wspawn_pfn)();
|
||||
|
|
|
@ -816,10 +816,21 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
|
|||
case GPGPU:
|
||||
switch (func3) {
|
||||
case 0: {
|
||||
// TMC
|
||||
tmask_.reset();
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
tmask_[i] = rsdata[0] & (1 << i);
|
||||
// TMC
|
||||
if (rsrc1) {
|
||||
// predicate mode
|
||||
ThreadMask pred;
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
pred[i] = tmask_[i] ? (iRegFile_[i][rsrc0] != 0) : 0;
|
||||
}
|
||||
if (pred.any()) {
|
||||
tmask_ &= pred;
|
||||
}
|
||||
} else {
|
||||
tmask_.reset();
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
tmask_[i] = rsdata[0] & (1 << i);
|
||||
}
|
||||
}
|
||||
D(3, "*** TMC " << tmask_);
|
||||
active_ = tmask_.any();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue