mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
thread mask redesign
This commit is contained in:
parent
7b8fe11e6a
commit
e4d9fd8a00
14 changed files with 68 additions and 28 deletions
|
@ -17,8 +17,8 @@ module VX_commit #(
|
|||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
VX_writeback_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
);
|
||||
localparam CMTW = $clog2(3*`NUM_THREADS+1);
|
||||
|
||||
|
|
|
@ -223,6 +223,7 @@
|
|||
`define CSR_LWID 12'hCC3
|
||||
`define CSR_GWID `CSR_MHARTID
|
||||
`define CSR_GCID 12'hCC5
|
||||
`define CSR_TMASK 12'hCC4
|
||||
|
||||
// Machine SIMT CSRs
|
||||
`define CSR_NT 12'hFC0
|
||||
|
|
|
@ -12,6 +12,7 @@ module VX_csr_data #(
|
|||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
@ -62,15 +63,15 @@ module VX_csr_data #(
|
|||
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||
`CSR_MIE: csr_mie <= write_data;
|
||||
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||
`CSR_MIE: csr_mie <= write_data;
|
||||
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||
|
||||
`CSR_MEPC: csr_mepc <= write_data;
|
||||
`CSR_MEPC: csr_mepc <= write_data;
|
||||
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
@ -114,6 +115,9 @@ module VX_csr_data #(
|
|||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
`CSR_GCID : read_data_r = CORE_ID;
|
||||
|
||||
`CSR_TMASK : read_data_r = 32'(fetch_to_csr_if.thread_masks[read_wid]);
|
||||
|
||||
`CSR_NT : read_data_r = `NUM_THREADS;
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
|
|
@ -12,6 +12,7 @@ module VX_csr_unit #(
|
|||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
|
||||
|
@ -42,6 +43,7 @@ module VX_csr_unit #(
|
|||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
|
|
|
@ -12,9 +12,12 @@ module VX_execute #(
|
|||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
|
||||
// commit status
|
||||
// commit interface
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
|
@ -84,9 +87,10 @@ module VX_execute #(
|
|||
.reset (csr_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
|
|
@ -21,6 +21,10 @@ module VX_fetch #(
|
|||
// outputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
|
||||
// csr interface
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
|
||||
// busy status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
|
@ -32,13 +36,18 @@ module VX_fetch #(
|
|||
`SCOPE_BIND_VX_fetch_warp_sched
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.wstall_if (wstall_if),
|
||||
.join_if (join_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
|
||||
.ifetch_req_if (ifetch_req_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
|
||||
.fetch_to_csr_if (fetch_to_csr_if),
|
||||
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
|
|
|
@ -3,14 +3,10 @@
|
|||
module VX_fpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// inputs
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
|
||||
|
|
|
@ -32,19 +32,15 @@ module VX_gpu_unit #(
|
|||
|
||||
// tmc
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign tmc.valid = is_tmc;
|
||||
assign tmc.tmask = tmc_new_mask;
|
||||
assign tmc.tmask = `NUM_THREADS'(gpu_req_if.rs1_data[gpu_req_if.tid]);
|
||||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[gpu_req_if.tid]);
|
||||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
|
@ -56,7 +52,7 @@ module VX_gpu_unit #(
|
|||
wire [`NUM_THREADS-1:0] split_else_mask;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire taken = gpu_req_if.rs1_data[i][0];
|
||||
wire taken = gpu_req_if.rs1_data[i][gpu_req_if.tid];
|
||||
assign split_then_mask[i] = gpu_req_if.tmask[i] & taken;
|
||||
assign split_else_mask[i] = gpu_req_if.tmask[i] & ~taken;
|
||||
end
|
||||
|
@ -70,7 +66,7 @@ module VX_gpu_unit #(
|
|||
// barrier
|
||||
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.id = gpu_req_if.rs1_data[gpu_req_if.tid][`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1);
|
||||
|
||||
// output
|
||||
|
|
|
@ -119,15 +119,15 @@ module VX_instr_demux (
|
|||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
|
|
@ -108,6 +108,7 @@ module VX_pipeline #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_fetch_to_csr_if fetch_to_csr_if();
|
||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_branch_ctl_if branch_ctl_if();
|
||||
|
@ -155,6 +156,7 @@ module VX_pipeline #(
|
|||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
|
@ -209,7 +211,8 @@ module VX_pipeline #(
|
|||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
|
|
|
@ -16,6 +16,8 @@ module VX_warp_sched #(
|
|||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
|
||||
output wire busy
|
||||
);
|
||||
|
||||
|
@ -153,6 +155,9 @@ module VX_warp_sched #(
|
|||
end
|
||||
end
|
||||
|
||||
// export thread mask register
|
||||
assign fetch_to_csr_if.thread_masks = thread_masks;
|
||||
|
||||
// calculate active barrier status
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
|
|
12
hw/rtl/interfaces/VX_fetch_to_csr_if.v
Normal file
12
hw/rtl/interfaces/VX_fetch_to_csr_if.v
Normal file
|
@ -0,0 +1,12 @@
|
|||
`ifndef VX_FETCH_TO_CSR_IF
|
||||
`define VX_FETCH_TO_CSR_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_fetch_to_csr_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] thread_masks [`NUM_WARPS-1:0];
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -12,6 +12,7 @@ interface VX_gpu_req_if();
|
|||
wire [31:0] PC;
|
||||
wire [31:0] next_PC;
|
||||
wire [`GPU_BITS-1:0] op_type;
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
|
|
@ -121,6 +121,13 @@ inline int vx_core_id() {
|
|||
return result;
|
||||
}
|
||||
|
||||
// Return current threadk mask
|
||||
inline int vx_thread_mask() {
|
||||
int result;
|
||||
asm volatile ("csrr %0, %1" : "=r"(result) : "i"(CSR_TMASK));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Return the number of threads in a warp
|
||||
inline int vx_num_threads() {
|
||||
int result;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue