mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
tex refactoring and bug fixes
This commit is contained in:
parent
17424ad554
commit
676a13f30d
21 changed files with 227 additions and 154 deletions
|
@ -120,20 +120,12 @@ case $DRIVER in
|
|||
;;
|
||||
esac
|
||||
|
||||
case $APP in
|
||||
basic)
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/basic
|
||||
;;
|
||||
demo)
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/demo
|
||||
;;
|
||||
dogfood)
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/dogfood
|
||||
;;
|
||||
*)
|
||||
APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP
|
||||
;;
|
||||
esac
|
||||
if [ -d "$VORTEX_HOME/driver/tests/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/$APP
|
||||
else
|
||||
APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP
|
||||
fi
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG"
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
|
|
@ -16,6 +16,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
|
Binary file not shown.
|
@ -141,7 +141,6 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
kernel_arg.device_ptr = device;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
|
|
|
@ -153,7 +153,7 @@ module VX_alu_unit #(
|
|||
|
||||
assign mul_ready_out = !stall_out;
|
||||
|
||||
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
|
||||
assign result_valid = mul_valid_out || (alu_req_if.valid && ~is_mul_op);
|
||||
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
|
||||
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
|
||||
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
|
||||
|
@ -164,7 +164,7 @@ module VX_alu_unit #(
|
|||
|
||||
`else
|
||||
|
||||
assign stall_in = 0;
|
||||
assign stall_in = stall_out;
|
||||
|
||||
assign result_valid = alu_req_if.valid;
|
||||
assign result_wid = alu_req_if.wid;
|
||||
|
|
|
@ -85,6 +85,10 @@
|
|||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
`ifndef EXT_TEX_DISABLE
|
||||
`define EXT_TEX_ENABLE
|
||||
`endif
|
||||
|
||||
// Device identification
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
|
|
|
@ -13,7 +13,10 @@ module VX_csr_data #(
|
|||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
`endif
|
||||
|
||||
input wire read_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||
|
@ -80,10 +83,12 @@ module VX_csr_data #(
|
|||
end
|
||||
end
|
||||
|
||||
//write tex csrs
|
||||
assign tex_csr_if.write_addr = write_addr;
|
||||
assign tex_csr_if.write_data = write_data;
|
||||
// TEX CSRs
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
assign tex_csr_if.write_enable = write_enable;
|
||||
assign tex_csr_if.write_addr = write_addr;
|
||||
assign tex_csr_if.write_data = write_data;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
|
@ -13,8 +13,11 @@ module VX_csr_unit #(
|
|||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
|
||||
`endif
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
|
@ -63,7 +66,9 @@ module VX_csr_unit #(
|
|||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.read_enable (csr_pipe_req_if.valid),
|
||||
.read_addr (csr_pipe_req_if.addr),
|
||||
.read_wid (csr_pipe_req_if.wid),
|
||||
|
|
|
@ -358,6 +358,7 @@ module VX_decode #(
|
|||
use_rs2 = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
3'h5: begin
|
||||
op_type = `OP_BITS'(`GPU_TEX);
|
||||
use_rd = 1;
|
||||
|
@ -365,6 +366,7 @@ module VX_decode #(
|
|||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -373,7 +375,7 @@ module VX_decode #(
|
|||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire use_rd_qual = use_rd && (rd_fp || (rd != 0));
|
||||
wire wb = use_rd && (rd_fp || (rd != 0));
|
||||
|
||||
// EX_ALU needs rs1=0 for LUI operation
|
||||
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
|
||||
|
@ -385,7 +387,7 @@ module VX_decode #(
|
|||
assign decode_if.ex_type = ex_type;
|
||||
assign decode_if.op_type = op_type;
|
||||
assign decode_if.op_mod = op_mod;
|
||||
assign decode_if.wb = use_rd_qual;
|
||||
assign decode_if.wb = wb;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign decode_if.rd = {rd_fp, rd};
|
||||
|
|
|
@ -156,7 +156,6 @@
|
|||
`define CSR_RW 2'h0
|
||||
`define CSR_RS 2'h1
|
||||
`define CSR_RC 2'h2
|
||||
`define CSR_OTHER 2'h3
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
|
@ -185,7 +184,6 @@
|
|||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_TEX 3'h5
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
|
||||
|
|
|
@ -45,7 +45,10 @@ module VX_execute #(
|
|||
output wire ebreak
|
||||
);
|
||||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if();
|
||||
`endif
|
||||
|
||||
wire[`NUM_WARPS-1:0] csr_pending;
|
||||
wire[`NUM_WARPS-1:0] fpu_pending;
|
||||
|
@ -84,7 +87,9 @@ module VX_execute #(
|
|||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
|
@ -131,9 +136,11 @@ module VX_execute #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
.tex_csr_if (tex_csr_if)
|
||||
.gpu_commit_if (gpu_commit_if)
|
||||
);
|
||||
|
||||
assign ebreak = alu_req_if.valid
|
||||
|
|
|
@ -10,7 +10,10 @@ module VX_gpu_unit #(
|
|||
|
||||
// Inputs
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
`endif
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
@ -18,23 +21,30 @@ module VX_gpu_unit #(
|
|||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire rsp_valid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
VX_tex_req_if tex_req_if;
|
||||
VX_tex_rsp_if tex_rsp_if;
|
||||
wire [(`NUM_THREADS * 32)-1:0] warp_ctl_data;
|
||||
wire is_warp_ctl;
|
||||
|
||||
wire stall_in, stall_out;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
|
||||
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
|
||||
|
||||
|
||||
// tmc
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
|
@ -76,10 +86,28 @@ module VX_gpu_unit #(
|
|||
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
|
||||
|
||||
// pack warp ctl result
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
assign warp_ctl_data = {tmc, wspawn, barrier, split};
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// texture
|
||||
assign tex_req_if.valid = is_tex;
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
|
||||
VX_tex_req_if tex_req_if;
|
||||
VX_tex_rsp_if tex_rsp_if;
|
||||
|
||||
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
|
||||
|
||||
assign tex_req_if.valid = gpu_req_if.valid && is_tex;
|
||||
assign tex_req_if.wid = gpu_req_if.wid;
|
||||
assign tex_req_if.tmask = gpu_req_if.tmask;
|
||||
assign tex_req_if.PC = gpu_req_if.PC;
|
||||
assign tex_req_if.rd = gpu_req_if.rd;
|
||||
assign tex_req_if.wb = gpu_req_if.wb;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
|
||||
|
@ -87,54 +115,78 @@ module VX_gpu_unit #(
|
|||
assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i];
|
||||
end
|
||||
|
||||
`UNUSED_VAR (tex_req_if.u)
|
||||
`UNUSED_VAR (tex_req_if.v)
|
||||
`UNUSED_VAR (tex_req_if.valid)
|
||||
`UNUSED_VAR (tex_req_if.lod_t)
|
||||
|
||||
|
||||
VX_tex_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) texture_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.tex_rsp_if (tex_rsp_if)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.tex_rsp_if (tex_rsp_if)
|
||||
);
|
||||
|
||||
assign gpu_req_if.valid = is_tex;
|
||||
assign gpu_req_if.wb = tex_rsp_if.ready;
|
||||
assign tex_rsp_if.ready = !stall_out;
|
||||
|
||||
assign stall_in = (is_tex && ~tex_req_if.ready)
|
||||
|| (~is_tex && (tex_rsp_if.valid || stall_out));
|
||||
|
||||
assign is_warp_ctl = !(is_tex || tex_rsp_if.valid);
|
||||
|
||||
assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex);
|
||||
assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid;
|
||||
assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask;
|
||||
assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC;
|
||||
assign rsp_rd = tex_rsp_if.rd;
|
||||
assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb;
|
||||
assign rsp_data = tex_rsp_if.valid ? tex_rsp_if.data : warp_ctl_data;
|
||||
|
||||
`else
|
||||
|
||||
assign stall_in = stall_out;
|
||||
assign is_warp_ctl = 1;
|
||||
|
||||
assign rsp_valid = gpu_req_if.valid;
|
||||
assign rsp_wid = gpu_req_if.wid;
|
||||
assign rsp_tmask = gpu_req_if.tmask;
|
||||
assign rsp_PC = gpu_req_if.PC;
|
||||
assign rsp_rd = 0;
|
||||
assign rsp_wb = 0;
|
||||
assign rsp_data = warp_ctl_data;
|
||||
|
||||
`UNUSED_VAR (gpu_req_if.rd)
|
||||
`UNUSED_VAR (gpu_req_if.wb)
|
||||
|
||||
`endif
|
||||
|
||||
wire is_warp_ctl_r;
|
||||
|
||||
// output
|
||||
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
.enable (!stall_out),
|
||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, gpu_commit_if.data, is_warp_ctl_r})
|
||||
);
|
||||
|
||||
assign gpu_commit_if.eop = 1'b1;
|
||||
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
// warp control reponse
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.barrier, warp_ctl_if.split} = gpu_commit_if.data;
|
||||
`IGNORE_WARNINGS_END
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready && is_warp_ctl_r;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign gpu_req_if.ready = ~stall;
|
||||
assign gpu_req_if.ready = ~stall_in;
|
||||
|
||||
`SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready);
|
||||
`SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask);
|
||||
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
|
||||
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
|
|
|
@ -195,7 +195,7 @@ module VX_issue #(
|
|||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -75,10 +75,10 @@
|
|||
|
||||
`define UP(x) (((x) > 0) ? x : 1)
|
||||
|
||||
`define SAFE_RNG(h,l) `MAX(h,l) : l
|
||||
`define SAFE_RNG(h, l) `MAX(h,l) : l
|
||||
|
||||
`define RTRIM(x,s) x[$bits(x)-1:($bits(x)-s)]
|
||||
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]
|
||||
|
||||
`define LTRIM(x,s) x[s-1:0]
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`endif
|
|
@ -128,6 +128,7 @@ task print_ex_op (
|
|||
`GPU_SPLIT: $write("SPLIT");
|
||||
`GPU_JOIN: $write("JOIN");
|
||||
`GPU_BAR: $write("BAR");
|
||||
`GPU_TEX: $write("TEX");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -4,19 +4,17 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tex_req_if ();
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0][31:0] u;
|
||||
wire [`NUM_THREADS-1:0][31:0] v;
|
||||
wire [`NUM_THREADS-1:0][31:0] lod_t;
|
||||
// wire [`NUM_THREADS-1:0][7:0] t;
|
||||
// wire [`MADDRW-1:0] addr;
|
||||
// wire [`MAXWTW-1:0] width;
|
||||
// wire [`MAXHTW-1:0] height;
|
||||
// wire [`MAXFTW-1:0] format;
|
||||
// wire [`MAXFMW-1:0] filter;
|
||||
// wire [`MAXAMW-1:0] clamp;
|
||||
// wire [`TAGW-1:0] tag;
|
||||
// wire ready;
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] u;
|
||||
wire [`NUM_THREADS-1:0][31:0] v;
|
||||
wire [`NUM_THREADS-1:0][31:0] lod_t;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
|
|
|
@ -4,11 +4,18 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tex_rsp_if ();
|
||||
// wire valid;
|
||||
// wire [`TAGW-1:0] tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
|
|
|
@ -6,56 +6,26 @@ module VX_tex_unit #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_tex_req_if tex_req_if,
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
|
||||
// Outputs
|
||||
VX_tex_rsp_if tex_rsp_if
|
||||
// VX_commit_if gpu_commit_if
|
||||
// // Texture Request
|
||||
// input wire tex_req_valid,
|
||||
// input wire [`TADDRW-1:0] tex_req_u,
|
||||
// input wire [`TADDRW-1:0] tex_req_v,
|
||||
// input wire [`MADDRW-1:0] tex_req_addr,
|
||||
// input wire [`MAXWTW-1:0] tex_req_width,
|
||||
// input wire [`MAXHTW-1:0] tex_req_height,
|
||||
// input wire [`MAXFTW-1:0] tex_req_format,
|
||||
// input wire [`MAXFMW-1:0] tex_req_filter,
|
||||
// input wire [`MAXAMW-1:0] tex_req_clamp,
|
||||
// input wire [`TAGW-1:0] tex_req_tag,
|
||||
// output wire tex_req_ready,
|
||||
|
||||
// // Texture Response
|
||||
// output wire tex_rsp_valid,
|
||||
// output wire [`TAGW-1:0] tex_rsp_tag,
|
||||
// input wire [`DATAW-1:0] tex_rsp_data,
|
||||
// input wire tex_rsp_ready,
|
||||
|
||||
// Cache Request
|
||||
// output wire [NUMCRQS-1:0] cache_req_valids,
|
||||
// output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
|
||||
// input wire cache_req_ready,
|
||||
|
||||
// Cache Response
|
||||
// input wire cache_rsp_valid,
|
||||
// input wire [MADDRW-1:0] cache_rsp_addr,
|
||||
// input wire [DATAW-1:0] cache_rsp_data,
|
||||
// output wire cache_rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`UNUSED_VAR(tex_addr)
|
||||
`UNUSED_VAR(tex_format)
|
||||
`UNUSED_VAR(tex_width)
|
||||
`UNUSED_VAR(tex_height)
|
||||
`UNUSED_VAR(tex_stride)
|
||||
`UNUSED_VAR(tex_wrap_u)
|
||||
`UNUSED_VAR(tex_wrap_v)
|
||||
`UNUSED_VAR(tex_min_filter)
|
||||
`UNUSED_VAR(tex_max_filter)
|
||||
wire rsp_valid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||
wire stall_in, stall_out;
|
||||
|
||||
reg [`CSR_WIDTH-1:0] tex_addr [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`CSR_WIDTH-1:0] tex_format [`NUM_TEX_UNITS-1: 0];
|
||||
|
@ -67,44 +37,81 @@ module VX_tex_unit #(
|
|||
reg [`CSR_WIDTH-1:0] tex_min_filter [`NUM_TEX_UNITS-1: 0];
|
||||
reg [`CSR_WIDTH-1:0] tex_max_filter [`NUM_TEX_UNITS-1: 0];
|
||||
|
||||
`UNUSED_VAR (tex_addr)
|
||||
`UNUSED_VAR (tex_format)
|
||||
`UNUSED_VAR (tex_width)
|
||||
`UNUSED_VAR (tex_height)
|
||||
`UNUSED_VAR (tex_stride)
|
||||
`UNUSED_VAR (tex_wrap_u)
|
||||
`UNUSED_VAR (tex_wrap_v)
|
||||
`UNUSED_VAR (tex_min_filter)
|
||||
`UNUSED_VAR (tex_max_filter)
|
||||
|
||||
//tex csr programming, need to make make consistent with `NUM_TEX_UNITS
|
||||
always @(posedge clk ) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_MIN_FILTER : tex_min_filter[0] <= tex_csr_if.write_data;
|
||||
`CSR_TEX0_MAX_FILTER : tex_max_filter[0] <= tex_csr_if.write_data;
|
||||
|
||||
`CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_MIN_FILTER : tex_min_filter[1] <= tex_csr_if.write_data;
|
||||
`CSR_TEX1_MAX_FILTER : tex_max_filter[1] <= tex_csr_if.write_data;
|
||||
default:
|
||||
assert(tex_csr_if.write_addr > `CSR_TEX_END || tex_csr_if.write_addr < `CSR_TEX_BEGIN) else $error("%t: invalid CSR write address: %0h", $time, tex_csr_if.write_addr);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_rsp_if.data[i] = 32'hFAAF;
|
||||
end
|
||||
// texture response
|
||||
`UNUSED_VAR (tex_req_if.u)
|
||||
`UNUSED_VAR (tex_req_if.v)
|
||||
`UNUSED_VAR (tex_req_if.lod_t)
|
||||
|
||||
assign tex_rsp_if.ready = 1'b1;
|
||||
assign stall_in = stall_out;
|
||||
|
||||
`ifdef DBG_PRINT_TEX_CSRS
|
||||
assign rsp_valid = tex_req_if.valid;
|
||||
assign rsp_wid = tex_req_if.wid;
|
||||
assign rsp_tmask = tex_req_if.tmask;
|
||||
assign rsp_PC = tex_req_if.PC;
|
||||
assign rsp_rd = tex_req_if.rd;
|
||||
assign rsp_wb = tex_req_if.wb;
|
||||
assign rsp_data = {`NUM_THREADS{32'hFAAF}}; // dummy color value
|
||||
|
||||
// output
|
||||
assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({tex_rsp_if.valid, tex_rsp_if.wid, tex_rsp_if.tmask, tex_rsp_if.PC, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.data})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign tex_req_if.ready = ~stall_in;
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_addr <= `CSR_TEX_END || tex_csr_if.write_addr >= `CSR_TEX_BEGIN) begin
|
||||
if (tex_csr_if.write_enable
|
||||
&& (tex_csr_if.write_addr <= `CSR_TEX_END
|
||||
|| tex_csr_if.write_addr >= `CSR_TEX_BEGIN)) begin
|
||||
$display("%t: core%0d-tex_csr: csr_tex0_addr, csr_data=%0h", $time, CORE_ID, tex_addr[0]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]);
|
||||
$display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]);
|
||||
|
@ -116,7 +123,6 @@ module VX_tex_unit #(
|
|||
$display("%t: core%0d-tex_csr: csr_tex0_max_filter, csr_data=%0h", $time, CORE_ID, tex_max_filter[0]);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -147,18 +147,12 @@
|
|||
"wsched_warp_pc": "32"
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
|
||||
"?gpu_req_fire": 1,
|
||||
"gpu_req_wid": "`NW_BITS",
|
||||
"gpu_req_tmask": "`NUM_THREADS",
|
||||
"gpu_req_op_type": "`GPU_BITS",
|
||||
"gpu_req_rs1": "32",
|
||||
"gpu_req_rs2": "32",
|
||||
"?gpu_rsp_valid": 1,
|
||||
"gpu_rsp_wid": "`NW_BITS",
|
||||
"gpu_rsp_tmc": "`GPU_TMC_SIZE",
|
||||
"gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE",
|
||||
"gpu_rsp_split": "`GPU_SPLIT_SIZE",
|
||||
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
|
||||
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
|
||||
"?dcache_req_fire":"`NUM_THREADS",
|
||||
|
|
|
@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue