tex refactoring and bug fixes

This commit is contained in:
Blaise Tine 2021-03-16 09:25:57 -04:00
parent 17424ad554
commit 676a13f30d
21 changed files with 227 additions and 154 deletions

View file

@ -120,20 +120,12 @@ case $DRIVER in
;;
esac
case $APP in
basic)
APP_PATH=$VORTEX_HOME/driver/tests/basic
;;
demo)
APP_PATH=$VORTEX_HOME/driver/tests/demo
;;
dogfood)
APP_PATH=$VORTEX_HOME/driver/tests/dogfood
;;
*)
APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP
;;
esac
if [ -d "$VORTEX_HOME/driver/tests/$APP" ];
then
APP_PATH=$VORTEX_HOME/driver/tests/$APP
else
APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP
fi
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG"

View file

@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO

View file

@ -16,6 +16,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO

Binary file not shown.

View file

@ -141,7 +141,6 @@ int main(int argc, char *argv[]) {
kernel_arg.num_tasks = num_tasks;
kernel_arg.task_size = count;
kernel_arg.device_ptr = device;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;

View file

@ -153,7 +153,7 @@ module VX_alu_unit #(
assign mul_ready_out = !stall_out;
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
assign result_valid = mul_valid_out || (alu_req_if.valid && ~is_mul_op);
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
@ -164,7 +164,7 @@ module VX_alu_unit #(
`else
assign stall_in = 0;
assign stall_in = stall_out;
assign result_valid = alu_req_if.valid;
assign result_wid = alu_req_if.wid;

View file

@ -85,6 +85,10 @@
`define EXT_F_ENABLE
`endif
`ifndef EXT_TEX_DISABLE
`define EXT_TEX_ENABLE
`endif
// Device identification
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0

View file

@ -13,7 +13,10 @@ module VX_csr_data #(
VX_cmt_to_csr_if cmt_to_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
`ifdef EXT_TEX_ENABLE
VX_tex_csr_if tex_csr_if,
`endif
input wire read_enable,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
@ -80,10 +83,12 @@ module VX_csr_data #(
end
end
//write tex csrs
assign tex_csr_if.write_addr = write_addr;
assign tex_csr_if.write_data = write_data;
// TEX CSRs
`ifdef EXT_TEX_ENABLE
assign tex_csr_if.write_enable = write_enable;
assign tex_csr_if.write_addr = write_addr;
assign tex_csr_if.write_data = write_data;
`endif
always @(posedge clk) begin
if (reset) begin

View file

@ -13,8 +13,11 @@ module VX_csr_unit #(
VX_cmt_to_csr_if cmt_to_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
`ifdef EXT_TEX_ENABLE
VX_tex_csr_if tex_csr_if,
`endif
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
@ -63,7 +66,9 @@ module VX_csr_unit #(
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fpu_to_csr_if (fpu_to_csr_if),
`ifdef EXT_TEX_ENABLE
.tex_csr_if (tex_csr_if),
`endif
.read_enable (csr_pipe_req_if.valid),
.read_addr (csr_pipe_req_if.addr),
.read_wid (csr_pipe_req_if.wid),

View file

@ -358,6 +358,7 @@ module VX_decode #(
use_rs2 = 1;
is_wstall = 1;
end
`ifdef EXT_TEX_ENABLE
3'h5: begin
op_type = `OP_BITS'(`GPU_TEX);
use_rd = 1;
@ -365,6 +366,7 @@ module VX_decode #(
use_rs2 = 1;
use_rs3 = 1;
end
`endif
default:;
endcase
end
@ -373,7 +375,7 @@ module VX_decode #(
end
// disable write to integer register r0
wire use_rd_qual = use_rd && (rd_fp || (rd != 0));
wire wb = use_rd && (rd_fp || (rd != 0));
// EX_ALU needs rs1=0 for LUI operation
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
@ -385,7 +387,7 @@ module VX_decode #(
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
assign decode_if.wb = use_rd_qual;
assign decode_if.wb = wb;
`ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd};

View file

@ -156,7 +156,6 @@
`define CSR_RW 2'h0
`define CSR_RS 2'h1
`define CSR_RC 2'h2
`define CSR_OTHER 2'h3
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
@ -185,7 +184,6 @@
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_TEX 3'h5
`define GPU_OTHER 3'h7
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]

View file

@ -45,7 +45,10 @@ module VX_execute #(
output wire ebreak
);
VX_fpu_to_csr_if fpu_to_csr_if();
`ifdef EXT_TEX_ENABLE
VX_tex_csr_if tex_csr_if();
`endif
wire[`NUM_WARPS-1:0] csr_pending;
wire[`NUM_WARPS-1:0] fpu_pending;
@ -84,7 +87,9 @@ module VX_execute #(
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fpu_to_csr_if (fpu_to_csr_if),
`ifdef EXT_TEX_ENABLE
.tex_csr_if (tex_csr_if),
`endif
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if),
@ -131,9 +136,11 @@ module VX_execute #(
.clk (clk),
.reset (reset),
.gpu_req_if (gpu_req_if),
`ifdef EXT_TEX_ENABLE
.tex_csr_if (tex_csr_if),
`endif
.warp_ctl_if (warp_ctl_if),
.gpu_commit_if (gpu_commit_if),
.tex_csr_if (tex_csr_if)
.gpu_commit_if (gpu_commit_if)
);
assign ebreak = alu_req_if.valid

View file

@ -10,7 +10,10 @@ module VX_gpu_unit #(
// Inputs
VX_gpu_req_if gpu_req_if,
`ifdef EXT_TEX_ENABLE
VX_tex_csr_if tex_csr_if,
`endif
// Outputs
VX_warp_ctl_if warp_ctl_if,
@ -18,23 +21,30 @@ module VX_gpu_unit #(
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire rsp_valid;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [`NUM_THREADS-1:0][31:0] rsp_data;
gpu_tmc_t tmc;
gpu_wspawn_t wspawn;
gpu_barrier_t barrier;
gpu_split_t split;
VX_tex_req_if tex_req_if;
VX_tex_rsp_if tex_rsp_if;
wire [(`NUM_THREADS * 32)-1:0] warp_ctl_data;
wire is_warp_ctl;
wire stall_in, stall_out;
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
// tmc
wire [`NUM_THREADS-1:0] tmc_new_mask;
@ -76,10 +86,28 @@ module VX_gpu_unit #(
assign barrier.valid = is_bar;
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
// pack warp ctl result
`IGNORE_WARNINGS_BEGIN
assign warp_ctl_data = {tmc, wspawn, barrier, split};
`IGNORE_WARNINGS_END
// texture
assign tex_req_if.valid = is_tex;
`ifdef EXT_TEX_ENABLE
VX_tex_req_if tex_req_if;
VX_tex_rsp_if tex_rsp_if;
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
assign tex_req_if.valid = gpu_req_if.valid && is_tex;
assign tex_req_if.wid = gpu_req_if.wid;
assign tex_req_if.tmask = gpu_req_if.tmask;
assign tex_req_if.PC = gpu_req_if.PC;
assign tex_req_if.rd = gpu_req_if.rd;
assign tex_req_if.wb = gpu_req_if.wb;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
@ -87,54 +115,78 @@ module VX_gpu_unit #(
assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i];
end
`UNUSED_VAR (tex_req_if.u)
`UNUSED_VAR (tex_req_if.v)
`UNUSED_VAR (tex_req_if.valid)
`UNUSED_VAR (tex_req_if.lod_t)
VX_tex_unit #(
.CORE_ID(CORE_ID)
) texture_unit (
.clk (clk),
.reset (reset),
.tex_req_if (tex_req_if),
.tex_csr_if (tex_csr_if),
.tex_rsp_if (tex_rsp_if)
.clk (clk),
.reset (reset),
.tex_req_if (tex_req_if),
.tex_csr_if (tex_csr_if),
.tex_rsp_if (tex_rsp_if)
);
assign gpu_req_if.valid = is_tex;
assign gpu_req_if.wb = tex_rsp_if.ready;
assign tex_rsp_if.ready = !stall_out;
assign stall_in = (is_tex && ~tex_req_if.ready)
|| (~is_tex && (tex_rsp_if.valid || stall_out));
assign is_warp_ctl = !(is_tex || tex_rsp_if.valid);
assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex);
assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid;
assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask;
assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC;
assign rsp_rd = tex_rsp_if.rd;
assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb;
assign rsp_data = tex_rsp_if.valid ? tex_rsp_if.data : warp_ctl_data;
`else
assign stall_in = stall_out;
assign is_warp_ctl = 1;
assign rsp_valid = gpu_req_if.valid;
assign rsp_wid = gpu_req_if.wid;
assign rsp_tmask = gpu_req_if.tmask;
assign rsp_PC = gpu_req_if.PC;
assign rsp_rd = 0;
assign rsp_wb = 0;
assign rsp_data = warp_ctl_data;
`UNUSED_VAR (gpu_req_if.rd)
`UNUSED_VAR (gpu_req_if.wb)
`endif
wire is_warp_ctl_r;
// output
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)),
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
.enable (!stall_out),
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}),
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, gpu_commit_if.data, is_warp_ctl_r})
);
assign gpu_commit_if.eop = 1'b1;
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
assign warp_ctl_if.wid = gpu_commit_if.wid;
// warp control reponse
`IGNORE_WARNINGS_BEGIN
assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.barrier, warp_ctl_if.split} = gpu_commit_if.data;
`IGNORE_WARNINGS_END
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready && is_warp_ctl_r;
assign warp_ctl_if.wid = gpu_commit_if.wid;
// can accept new request?
assign gpu_req_if.ready = ~stall;
assign gpu_req_if.ready = ~stall_in;
`SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready);
`SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid);
`SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask);
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]);
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);

View file

@ -195,7 +195,7 @@ module VX_issue #(
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
end
if (gpu_req_if.valid && gpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data);
end
end
`endif

View file

@ -75,10 +75,10 @@
`define UP(x) (((x) > 0) ? x : 1)
`define SAFE_RNG(h,l) `MAX(h,l) : l
`define SAFE_RNG(h, l) `MAX(h,l) : l
`define RTRIM(x,s) x[$bits(x)-1:($bits(x)-s)]
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]
`define LTRIM(x,s) x[s-1:0]
`define LTRIM(x, s) x[s-1:0]
`endif

View file

@ -128,6 +128,7 @@ task print_ex_op (
`GPU_SPLIT: $write("SPLIT");
`GPU_JOIN: $write("JOIN");
`GPU_BAR: $write("BAR");
`GPU_TEX: $write("TEX");
default: $write("?");
endcase
end

View file

@ -4,19 +4,17 @@
`include "VX_define.vh"
interface VX_tex_req_if ();
wire valid;
wire [`NUM_THREADS-1:0][31:0] u;
wire [`NUM_THREADS-1:0][31:0] v;
wire [`NUM_THREADS-1:0][31:0] lod_t;
// wire [`NUM_THREADS-1:0][7:0] t;
// wire [`MADDRW-1:0] addr;
// wire [`MAXWTW-1:0] width;
// wire [`MAXHTW-1:0] height;
// wire [`MAXFTW-1:0] format;
// wire [`MAXFMW-1:0] filter;
// wire [`MAXAMW-1:0] clamp;
// wire [`TAGW-1:0] tag;
// wire ready;
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`NR_BITS-1:0] rd;
wire wb;
wire [`NUM_THREADS-1:0][31:0] u;
wire [`NUM_THREADS-1:0][31:0] v;
wire [`NUM_THREADS-1:0][31:0] lod_t;
wire ready;
endinterface
`endif

View file

@ -4,11 +4,18 @@
`include "VX_define.vh"
interface VX_tex_rsp_if ();
// wire valid;
// wire [`TAGW-1:0] tag;
wire [`NUM_THREADS-1:0][31:0] data;
wire ready;
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`NR_BITS-1:0] rd;
wire wb;
wire [`NUM_THREADS-1:0][31:0] data;
wire ready;
endinterface
`endif

View file

@ -6,56 +6,26 @@ module VX_tex_unit #(
) (
input wire clk,
input wire reset,
// Inputs
VX_tex_req_if tex_req_if,
VX_tex_csr_if tex_csr_if,
// Outputs
VX_tex_rsp_if tex_rsp_if
// VX_commit_if gpu_commit_if
// // Texture Request
// input wire tex_req_valid,
// input wire [`TADDRW-1:0] tex_req_u,
// input wire [`TADDRW-1:0] tex_req_v,
// input wire [`MADDRW-1:0] tex_req_addr,
// input wire [`MAXWTW-1:0] tex_req_width,
// input wire [`MAXHTW-1:0] tex_req_height,
// input wire [`MAXFTW-1:0] tex_req_format,
// input wire [`MAXFMW-1:0] tex_req_filter,
// input wire [`MAXAMW-1:0] tex_req_clamp,
// input wire [`TAGW-1:0] tex_req_tag,
// output wire tex_req_ready,
// // Texture Response
// output wire tex_rsp_valid,
// output wire [`TAGW-1:0] tex_rsp_tag,
// input wire [`DATAW-1:0] tex_rsp_data,
// input wire tex_rsp_ready,
// Cache Request
// output wire [NUMCRQS-1:0] cache_req_valids,
// output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
// input wire cache_req_ready,
// Cache Response
// input wire cache_rsp_valid,
// input wire [MADDRW-1:0] cache_rsp_addr,
// input wire [DATAW-1:0] cache_rsp_data,
// output wire cache_rsp_ready
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR(tex_addr)
`UNUSED_VAR(tex_format)
`UNUSED_VAR(tex_width)
`UNUSED_VAR(tex_height)
`UNUSED_VAR(tex_stride)
`UNUSED_VAR(tex_wrap_u)
`UNUSED_VAR(tex_wrap_v)
`UNUSED_VAR(tex_min_filter)
`UNUSED_VAR(tex_max_filter)
wire rsp_valid;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [`NUM_THREADS-1:0][31:0] rsp_data;
wire stall_in, stall_out;
reg [`CSR_WIDTH-1:0] tex_addr [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_format [`NUM_TEX_UNITS-1: 0];
@ -67,44 +37,81 @@ module VX_tex_unit #(
reg [`CSR_WIDTH-1:0] tex_min_filter [`NUM_TEX_UNITS-1: 0];
reg [`CSR_WIDTH-1:0] tex_max_filter [`NUM_TEX_UNITS-1: 0];
`UNUSED_VAR (tex_addr)
`UNUSED_VAR (tex_format)
`UNUSED_VAR (tex_width)
`UNUSED_VAR (tex_height)
`UNUSED_VAR (tex_stride)
`UNUSED_VAR (tex_wrap_u)
`UNUSED_VAR (tex_wrap_v)
`UNUSED_VAR (tex_min_filter)
`UNUSED_VAR (tex_max_filter)
//tex csr programming, need to make make consistent with `NUM_TEX_UNITS
always @(posedge clk ) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data;
`CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data;
`CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data;
`CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data;
`CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data;
`CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data;
`CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data;
`CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data;
`CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data;
`CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data;
`CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data;
`CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data;
`CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data;
`CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data;
`CSR_TEX0_MIN_FILTER : tex_min_filter[0] <= tex_csr_if.write_data;
`CSR_TEX0_MAX_FILTER : tex_max_filter[0] <= tex_csr_if.write_data;
`CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data;
`CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data;
`CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data;
`CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data;
`CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data;
`CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data;
`CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data;
`CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data;
`CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data;
`CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data;
`CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data;
`CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data;
`CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data;
`CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data;
`CSR_TEX1_MIN_FILTER : tex_min_filter[1] <= tex_csr_if.write_data;
`CSR_TEX1_MAX_FILTER : tex_max_filter[1] <= tex_csr_if.write_data;
default:
assert(tex_csr_if.write_addr > `CSR_TEX_END || tex_csr_if.write_addr < `CSR_TEX_BEGIN) else $error("%t: invalid CSR write address: %0h", $time, tex_csr_if.write_addr);
default:;
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign tex_rsp_if.data[i] = 32'hFAAF;
end
// texture response
`UNUSED_VAR (tex_req_if.u)
`UNUSED_VAR (tex_req_if.v)
`UNUSED_VAR (tex_req_if.lod_t)
assign tex_rsp_if.ready = 1'b1;
assign stall_in = stall_out;
`ifdef DBG_PRINT_TEX_CSRS
assign rsp_valid = tex_req_if.valid;
assign rsp_wid = tex_req_if.wid;
assign rsp_tmask = tex_req_if.tmask;
assign rsp_PC = tex_req_if.PC;
assign rsp_rd = tex_req_if.rd;
assign rsp_wb = tex_req_if.wb;
assign rsp_data = {`NUM_THREADS{32'hFAAF}}; // dummy color value
// output
assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
.data_out ({tex_rsp_if.valid, tex_rsp_if.wid, tex_rsp_if.tmask, tex_rsp_if.PC, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.data})
);
// can accept new request?
assign tex_req_if.ready = ~stall_in;
`ifdef DBG_PRINT_TEX
always @(posedge clk) begin
if (tex_csr_if.write_addr <= `CSR_TEX_END || tex_csr_if.write_addr >= `CSR_TEX_BEGIN) begin
if (tex_csr_if.write_enable
&& (tex_csr_if.write_addr <= `CSR_TEX_END
|| tex_csr_if.write_addr >= `CSR_TEX_BEGIN)) begin
$display("%t: core%0d-tex_csr: csr_tex0_addr, csr_data=%0h", $time, CORE_ID, tex_addr[0]);
$display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]);
$display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]);
@ -116,7 +123,6 @@ module VX_tex_unit #(
$display("%t: core%0d-tex_csr: csr_tex0_max_filter, csr_data=%0h", $time, CORE_ID, tex_max_filter[0]);
end
end
`endif
`endif
endmodule

View file

@ -147,18 +147,12 @@
"wsched_warp_pc": "32"
},
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
"?gpu_req_fire": 1,
"gpu_req_wid": "`NW_BITS",
"gpu_req_tmask": "`NUM_THREADS",
"gpu_req_op_type": "`GPU_BITS",
"gpu_req_rs1": "32",
"gpu_req_rs2": "32",
"?gpu_rsp_valid": 1,
"gpu_rsp_wid": "`NW_BITS",
"gpu_rsp_tmc": "`GPU_TMC_SIZE",
"gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE",
"gpu_rsp_split": "`GPU_SPLIT_SIZE",
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
"gpu_rsp_barrier": "`GPU_BARRIER_SIZE"
},
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
"?dcache_req_fire":"`NUM_THREADS",

View file

@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO