tex_unit update

This commit is contained in:
Blaise Tine 2021-03-30 07:01:45 -04:00
parent 28ee19779c
commit 79fcdf7a28
20 changed files with 756 additions and 626 deletions

View file

@ -9,7 +9,7 @@ struct kernel_arg_t {
uint32_t src_height;
uint32_t src_stride;
uint32_t src_pitch;
uint32_t src_ptr;
uint8_t src_ptr;
uint32_t dst_width;
uint32_t dst_height;
uint32_t dst_stride;

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

View file

@ -16,18 +16,19 @@ struct tile_arg_t {
void kernel_body(int task_id, void* arg) {
struct tile_arg_t* _arg = (struct tile_arg_t*)(arg);
uint32_t xoffset = task_id * _arg->tile_width;
uint32_t xoffset = 0;
uint32_t yoffset = task_id * _arg->tile_height;
uint32_t* dst_ptr = (uint32_t*)_arg->karg.dst_ptr + xoffset + yoffset * _arg->karg.dst_pitch;
uint8_t* dst_ptr = (uint8_t*)(_arg->karg.dst_ptr + xoffset * _arg->karg.dst_stride + yoffset * _arg->karg.dst_pitch);
float fu = xoffset * _arg->deltaX;
float fv = yoffset * _arg->deltaY;
for (uint32_t y = 0; y < _arg->tile_height; ++y) {
uint32_t* dst_row = (uint32_t*)dst_ptr;
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
int32_t u = (int32_t)(fu * (1<<20));
int32_t v = (int32_t)(fv * (1<<20));
dst_ptr[x] = vx_tex(0, u, v, 0x0);
dst_row[x] = vx_tex(0, u, v, 0x0);
fu += _arg->deltaX;
}
dst_ptr += _arg->karg.dst_pitch;

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -20,7 +20,7 @@
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
const char* input_file = "sample.tga";
const char* input_file = "palette.tga";
const char* output_file = "output.tga";
float scale = 1.0f;
@ -69,7 +69,7 @@ void cleanup() {
}
}
int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t dst_bpp) {
int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t bpp) {
// start device
std::cout << "start device" << std::endl;
RT_CHECK(vx_start(device));
@ -83,14 +83,15 @@ int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width,
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
std::vector<uint8_t> dst_pixels(buf_size);
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < buf_size; ++i) {
dst_pixels[i] = buf_ptr[i];
}
// save output image
std::cout << "save output image" << std::endl;
RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, dst_bpp));
dump_image(dst_pixels, width, height, bpp);
RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, bpp));
return 0;
}
@ -106,6 +107,7 @@ int main(int argc, char *argv[]) {
parse_args(argc, argv);
RT_CHECK(LoadTGA(input_file, src_pixels, &src_width, &src_height, &src_bpp));
dump_image(src_pixels, src_width, src_height, src_bpp);
uint32_t src_bufsize = src_bpp * src_width * src_height;
uint32_t dst_width = (uint32_t)(src_width * scale);
@ -122,9 +124,9 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
uint32_t num_tasks = max_cores * max_warps * max_threads;
uint32_t num_tasks = max_cores * max_warps * max_threads / 4;
std::cout << "number of tasks: " << num_tasks << std::endl;
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
std::cout << "destination buffer: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl;
@ -138,8 +140,8 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
std::cout << "src_addr=" << std::hex << src_addr << std::endl;
std::cout << "dst_addr=" << std::hex << dst_addr << std::endl;
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
// allocate staging shared memory
std::cout << "allocate shared memory" << std::endl;
@ -154,13 +156,13 @@ int main(int argc, char *argv[]) {
kernel_arg.src_width = src_width;
kernel_arg.src_height = src_height;
kernel_arg.src_stride = src_bpp;
kernel_arg.src_pitch = src_bpp * src_width * src_height;
kernel_arg.src_pitch = src_bpp * src_width;
kernel_arg.src_ptr = src_addr;
kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height;
kernel_arg.dst_stride = dst_bpp;
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
kernel_arg.dst_pitch = dst_bpp * dst_width;
kernel_arg.dst_ptr = dst_addr;
auto buf_ptr = (int*)vx_host_ptr(buffer);

Binary file not shown.

After

Width:  |  Height:  |  Size: 1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View file

@ -1,5 +1,6 @@
#include "utils.h"
#include <fstream>
#include <assert.h>
struct __attribute__((__packed__)) tga_header_t {
int8_t idlength;
@ -108,8 +109,35 @@ int SaveTGA(const char *filename,
header.bitsperpixel = bpp * 8;
header.imagedescriptor = 0;
ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
ofs.write((const char*)pixels.data(), pixels.size());
ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
uint32_t pitch = bpp * width;
const uint8_t* pixel_bytes = pixels.data() + (height - 1) * pitch;
for (uint32_t y = 0; y < height; ++y) {
const uint8_t* pixel_row = pixel_bytes;
for (uint32_t x = 0; x < width; ++x) {
ofs.write((const char*)pixel_row, bpp);
pixel_row += bpp;
}
pixel_bytes -= pitch;
}
return 0;
}
void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t height, uint32_t bpp) {
assert(width * height * bpp == pixels.size());
const uint8_t* pixel_bytes = pixels.data();
for (uint32_t y = 0; y < height; ++y) {
for (uint32_t x = 0; x < width; ++x) {
uint32_t pixel32 = 0;
for (uint32_t b = 0; b < bpp; ++b) {
uint32_t pixel8 = *pixel_bytes++;
pixel32 |= pixel8 << (b * 8);
}
if (x) std::cout << ", ";
std::cout << std::hex << pixel32;
}
std::cout << std::endl;
}
}

View file

@ -12,4 +12,6 @@ int SaveTGA(const char *filename,
const std::vector<uint8_t> &pixels,
uint32_t width,
uint32_t height,
uint32_t bpp);
uint32_t bpp);
void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t height, uint32_t bpp);

View file

@ -183,19 +183,46 @@ module VX_issue #(
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_req_if.valid && alu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
$write("\n");
end
if (lsu_req_if.valid && lsu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
$write(", data=");
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
$write("\n");
end
if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
$write("\n");
end
if (fpu_req_if.valid && fpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
$write(", rs3_data=");
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
$write("\n");
end
if (gpu_req_if.valid && gpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
$write(", rs3_data=");
`PRINT_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
$write("\n");
end
end
`endif

View file

@ -81,4 +81,24 @@
`define LTRIM(x, s) x[s-1:0]
`define PRINT_ARRAY1D(a, m) \
$write("{"); \
for (integer i = (m-1); i >= 0; --i) begin \
if (i != (m-1)) $write(", "); \
$write("0x%0h", a[i]); \
end \
$write("}"); \
`define PRINT_ARRAY2D(a, m, n) \
$write("{"); \
for (integer i = n-1; i >= 0; --i) begin \
$write("{"); \
for (integer j = (m-1); j >= 0; --j) begin \
if (j != (m-1)) $write(", "); \
$write("0x%0h", a[i][j]); \
end \
$write("}"); \
end \
$write("}")
`endif

View file

@ -151,4 +151,15 @@ module VX_tex_addr #(
assign ready_in = ~stall_out;
`ifdef DBG_PRINT_TEX
always @(posedge clk) begin
if (mem_req_valid && mem_req_ready) begin
$write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, filter=%0d, tride=%0d, addr=",
$time, CORE_ID, mem_req_wid, mem_req_PC, mem_req_tmask, mem_req_filter, mem_req_stride);
`PRINT_ARRAY2D(mem_req_addr, 4, `NUM_THREADS);
$write("\n");
end
end
`endif
endmodule

View file

@ -41,7 +41,6 @@ module VX_tex_bilerp #(
`UNUSED_VAR (V_lerp[63:56])
always @(*) begin
if (color_enable[3]==1'b1) //R
sampled_r[31:24] = V_lerp[55:48];

View file

@ -166,7 +166,8 @@ module VX_tex_memory #(
// Dcache Response
reg [3:0][`NUM_THREADS-1:0][31:0] rsp_texels, rsp_texels_n;
reg [`NUM_THREADS-1:0][31:0] rsp_cur_data;
wire [`NUM_THREADS-1:0][3:0][31:0] rsp_texels_qual;
reg [`NUM_THREADS-1:0][31:0] rsp_data_qual;
reg [RSP_CTR_W-1:0] rsp_rem_ctr;
wire [`NUM_THREADS-1:0] rsp_cur_tmask;
wire [RSP_CTR_W-1:0] rsp_max_cnt;
@ -187,8 +188,9 @@ module VX_tex_memory #(
assign rsp_max_cnt = $countones(q_req_tmask) * (q_req_filter ? 4 : 1);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] src_data = (i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] src_mask = {32{dcache_rsp_if.valid[i]}};
wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]) & src_mask;
reg [31:0] rsp_data_shifted;
always @(*) begin
@ -199,16 +201,16 @@ module VX_tex_memory #(
always @(*) begin
case (q_req_stride)
0: rsp_cur_data[i] = 32'(rsp_data_shifted[7:0]);
1: rsp_cur_data[i] = 32'(rsp_data_shifted[15:0]);
default: rsp_cur_data[i] = rsp_data_shifted;
0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]);
1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]);
default: rsp_data_qual[i] = rsp_data_shifted;
endcase
end
end
always @(*) begin
rsp_texels_n = rsp_texels;
rsp_texels_n[rsp_texel_idx] |= rsp_cur_data;
rsp_texels_n[rsp_texel_idx] |= rsp_data_qual;
end
always @(posedge clk) begin
@ -231,6 +233,12 @@ module VX_tex_memory #(
end
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
assign rsp_texels_qual[i][j] = rsp_texels_n[j][i];
end
end
wire stall_out = rsp_valid && ~rsp_ready;
wire rsp_texels_done = dcache_rsp_fire && (rsp_rem_ctr == RSP_CTR_W'(rsp_cur_cnt));
@ -244,8 +252,8 @@ module VX_tex_memory #(
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, rsp_texels_n, q_req_info}),
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_data, rsp_info})
.data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, rsp_texels_qual, q_req_info}),
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_data, rsp_info})
);
// Can accept new cache response?
@ -254,12 +262,22 @@ module VX_tex_memory #(
`ifdef DBG_PRINT_TEX
always @(posedge clk) begin
if ((| dcache_req_fire)) begin
$display("%t: T$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, is_dup=%b",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_addr, dcache_req_if.tag, req_texel_dup);
$write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag);
`PRINT_ARRAY1D(req_texel_addr, `NUM_THREADS);
$write(", is_dup=%b\n", req_texel_dup);
end
if (dcache_rsp_fire) begin
$display("%t: T$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, data=%0h, is_dup=%b",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_PC, dcache_rsp_if.tag, dcache_rsp_if.data, rsp_texel_dup);
$write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag);
`PRINT_ARRAY1D(rsp_data_qual, `NUM_THREADS);
$write("\n");
end
if (rsp_valid && rsp_ready) begin
$write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, filter=%0d, data=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_filter);
`PRINT_ARRAY2D(rsp_data, 4, `NUM_THREADS);
$write("\n");
end
end
`endif

View file

@ -38,7 +38,7 @@ module VX_tex_sampler #(
wire stall_out;
for (genvar i = 0; i<`NUM_THREADS ;i++ ) begin
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [3:0][63:0] formatted_data;
wire [`NUM_COLOR_CHANNEL-1:0] color_enable;
@ -57,24 +57,23 @@ module VX_tex_sampler #(
VX_tex_bilerp #(
.CORE_ID (CORE_ID)
) tex_bilerp (
.blendU(req_u[i][`BLEND_FRAC_64-1:0]), //blendU
.blendV(req_v[i][`BLEND_FRAC_64-1:0]), //blendV
.blendU (req_u[i][`BLEND_FRAC_64-1:0]),
.blendV (req_v[i][`BLEND_FRAC_64-1:0]),
.color_enable(color_enable),
.texels(formatted_data),
.sampled_data(req_data_bilerp[i])
.color_enable (color_enable),
.texels (formatted_data),
.sampled_data (req_data_bilerp[i])
);
end
for (genvar i = 0;i<`NUM_THREADS ;i++ ) begin
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign req_data[i] = (req_filter == `TEX_FILTER_BITS'(0)) ? req_texels[i][0] : req_data_bilerp[i];
end
assign stall_out = ~rsp_ready;
assign req_ready = rsp_ready;
assign stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
@ -86,4 +85,7 @@ module VX_tex_sampler #(
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data})
);
// can accept new request?
assign req_ready = ~stall_out;
endmodule

View file

@ -41,16 +41,28 @@ module VX_tex_unit #(
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
`CSR_TEX_WRAP(i) : begin
`CSR_TEX_ADDR(i) : begin
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
end
`CSR_TEX_FORMAT(i) : begin
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
end
`CSR_TEX_WRAP(i) : begin
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
end
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
`CSR_TEX_MIPOFF(i) : tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
`CSR_TEX_WIDTH(i) : tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
`CSR_TEX_HEIGHT(i) : tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
`CSR_TEX_FILTER(i) : begin
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
end
`CSR_TEX_MIPOFF(i) : begin
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
end
`CSR_TEX_WIDTH(i) : begin
tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
end
`CSR_TEX_HEIGHT(i) : begin
tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
end
default:
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
@ -212,22 +224,34 @@ module VX_tex_unit #(
);
`ifdef DBG_PRINT_TEX
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
always @(posedge clk) begin
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
always @(posedge clk) begin
if (tex_csr_if.write_enable
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
$display("%t: core%0d-tex_unit: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
$display("%t: core%0d-tex_unit: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex_unit: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
$display("%t: core%0d-tex_unit: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
$display("%t: core%0d-tex_unit: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
$display("%t: core%0d-tex_unit: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
$display("%t: core%0d-tex_unit: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
$display("%t: core%0d-tex_unit: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
$display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
$display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
$display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
$display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
$display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
$display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
$display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
$display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
end
end
end
always @(posedge clk) begin
if (tex_req_if.valid && tex_req_if.ready) begin
$display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, u=%0h, v=%0h, lod=%0h",
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.u, tex_req_if.v, tex_req_if.lod);
end
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
$write("%t: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
`PRINT_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
$write("\n");
end
end
`endif
endmodule