mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
tex_unit update
This commit is contained in:
parent
28ee19779c
commit
79fcdf7a28
20 changed files with 756 additions and 626 deletions
|
@ -9,7 +9,7 @@ struct kernel_arg_t {
|
|||
uint32_t src_height;
|
||||
uint32_t src_stride;
|
||||
uint32_t src_pitch;
|
||||
uint32_t src_ptr;
|
||||
uint8_t src_ptr;
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
uint32_t dst_stride;
|
||||
|
|
Binary file not shown.
BIN
driver/tests/tex_demo/football.tga
Normal file
BIN
driver/tests/tex_demo/football.tga
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
Binary file not shown.
|
@ -16,18 +16,19 @@ struct tile_arg_t {
|
|||
void kernel_body(int task_id, void* arg) {
|
||||
struct tile_arg_t* _arg = (struct tile_arg_t*)(arg);
|
||||
|
||||
uint32_t xoffset = task_id * _arg->tile_width;
|
||||
uint32_t xoffset = 0;
|
||||
uint32_t yoffset = task_id * _arg->tile_height;
|
||||
uint32_t* dst_ptr = (uint32_t*)_arg->karg.dst_ptr + xoffset + yoffset * _arg->karg.dst_pitch;
|
||||
uint8_t* dst_ptr = (uint8_t*)(_arg->karg.dst_ptr + xoffset * _arg->karg.dst_stride + yoffset * _arg->karg.dst_pitch);
|
||||
|
||||
float fu = xoffset * _arg->deltaX;
|
||||
float fv = yoffset * _arg->deltaY;
|
||||
|
||||
for (uint32_t y = 0; y < _arg->tile_height; ++y) {
|
||||
uint32_t* dst_row = (uint32_t*)dst_ptr;
|
||||
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
|
||||
int32_t u = (int32_t)(fu * (1<<20));
|
||||
int32_t v = (int32_t)(fv * (1<<20));
|
||||
dst_ptr[x] = vx_tex(0, u, v, 0x0);
|
||||
dst_row[x] = vx_tex(0, u, v, 0x0);
|
||||
fu += _arg->deltaX;
|
||||
}
|
||||
dst_ptr += _arg->karg.dst_pitch;
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -20,7 +20,7 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
const char* input_file = "sample.tga";
|
||||
const char* input_file = "palette.tga";
|
||||
const char* output_file = "output.tga";
|
||||
float scale = 1.0f;
|
||||
|
||||
|
@ -69,7 +69,7 @@ void cleanup() {
|
|||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t dst_bpp) {
|
||||
int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width, uint32_t height, uint32_t bpp) {
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
RT_CHECK(vx_start(device));
|
||||
|
@ -83,14 +83,15 @@ int run_test(const kernel_arg_t& kernel_arg, uint32_t buf_size, uint32_t width,
|
|||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
std::vector<uint8_t> dst_pixels(buf_size);
|
||||
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < buf_size; ++i) {
|
||||
dst_pixels[i] = buf_ptr[i];
|
||||
}
|
||||
|
||||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, dst_bpp));
|
||||
dump_image(dst_pixels, width, height, bpp);
|
||||
RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, bpp));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -106,6 +107,7 @@ int main(int argc, char *argv[]) {
|
|||
parse_args(argc, argv);
|
||||
|
||||
RT_CHECK(LoadTGA(input_file, src_pixels, &src_width, &src_height, &src_bpp));
|
||||
dump_image(src_pixels, src_width, src_height, src_bpp);
|
||||
uint32_t src_bufsize = src_bpp * src_width * src_height;
|
||||
|
||||
uint32_t dst_width = (uint32_t)(src_width * scale);
|
||||
|
@ -122,9 +124,9 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads / 4;
|
||||
|
||||
std::cout << "number of tasks: " << num_tasks << std::endl;
|
||||
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
|
||||
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
|
||||
std::cout << "destination buffer: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl;
|
||||
|
||||
|
@ -138,8 +140,8 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
|
||||
|
||||
std::cout << "src_addr=" << std::hex << src_addr << std::endl;
|
||||
std::cout << "dst_addr=" << std::hex << dst_addr << std::endl;
|
||||
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
|
||||
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
|
||||
|
||||
// allocate staging shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
|
@ -154,13 +156,13 @@ int main(int argc, char *argv[]) {
|
|||
kernel_arg.src_width = src_width;
|
||||
kernel_arg.src_height = src_height;
|
||||
kernel_arg.src_stride = src_bpp;
|
||||
kernel_arg.src_pitch = src_bpp * src_width * src_height;
|
||||
kernel_arg.src_pitch = src_bpp * src_width;
|
||||
kernel_arg.src_ptr = src_addr;
|
||||
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
kernel_arg.dst_stride = dst_bpp;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width * dst_height;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width;
|
||||
kernel_arg.dst_ptr = dst_addr;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
|
|
BIN
driver/tests/tex_demo/palette.tga
Normal file
BIN
driver/tests/tex_demo/palette.tga
Normal file
Binary file not shown.
After Width: | Height: | Size: 1 KiB |
Binary file not shown.
Before Width: | Height: | Size: 192 KiB |
BIN
driver/tests/tex_demo/toad.tga
Normal file
BIN
driver/tests/tex_demo/toad.tga
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
|
@ -1,5 +1,6 @@
|
|||
#include "utils.h"
|
||||
#include <fstream>
|
||||
#include <assert.h>
|
||||
|
||||
struct __attribute__((__packed__)) tga_header_t {
|
||||
int8_t idlength;
|
||||
|
@ -108,8 +109,35 @@ int SaveTGA(const char *filename,
|
|||
header.bitsperpixel = bpp * 8;
|
||||
header.imagedescriptor = 0;
|
||||
|
||||
ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
|
||||
ofs.write((const char*)pixels.data(), pixels.size());
|
||||
ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
|
||||
|
||||
uint32_t pitch = bpp * width;
|
||||
const uint8_t* pixel_bytes = pixels.data() + (height - 1) * pitch;
|
||||
for (uint32_t y = 0; y < height; ++y) {
|
||||
const uint8_t* pixel_row = pixel_bytes;
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
ofs.write((const char*)pixel_row, bpp);
|
||||
pixel_row += bpp;
|
||||
}
|
||||
pixel_bytes -= pitch;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t height, uint32_t bpp) {
|
||||
assert(width * height * bpp == pixels.size());
|
||||
const uint8_t* pixel_bytes = pixels.data();
|
||||
for (uint32_t y = 0; y < height; ++y) {
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
uint32_t pixel32 = 0;
|
||||
for (uint32_t b = 0; b < bpp; ++b) {
|
||||
uint32_t pixel8 = *pixel_bytes++;
|
||||
pixel32 |= pixel8 << (b * 8);
|
||||
}
|
||||
if (x) std::cout << ", ";
|
||||
std::cout << std::hex << pixel32;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
|
@ -12,4 +12,6 @@ int SaveTGA(const char *filename,
|
|||
const std::vector<uint8_t> &pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp);
|
||||
uint32_t bpp);
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t height, uint32_t bpp);
|
|
@ -183,19 +183,46 @@ module VX_issue #(
|
|||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
|
||||
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
|
||||
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||
$write(", data=");
|
||||
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
|
||||
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write(", rs3_data=");
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write(", rs3_data=");
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -81,4 +81,24 @@
|
|||
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`define PRINT_ARRAY1D(a, m) \
|
||||
$write("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
if (i != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
|
||||
`define PRINT_ARRAY2D(a, m, n) \
|
||||
$write("{"); \
|
||||
for (integer i = n-1; i >= 0; --i) begin \
|
||||
$write("{"); \
|
||||
for (integer j = (m-1); j >= 0; --j) begin \
|
||||
if (j != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i][j]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
end \
|
||||
$write("}")
|
||||
|
||||
`endif
|
|
@ -151,4 +151,15 @@ module VX_tex_addr #(
|
|||
|
||||
assign ready_in = ~stall_out;
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_valid && mem_req_ready) begin
|
||||
$write("%t: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, filter=%0d, tride=%0d, addr=",
|
||||
$time, CORE_ID, mem_req_wid, mem_req_PC, mem_req_tmask, mem_req_filter, mem_req_stride);
|
||||
`PRINT_ARRAY2D(mem_req_addr, 4, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -41,7 +41,6 @@ module VX_tex_bilerp #(
|
|||
|
||||
`UNUSED_VAR (V_lerp[63:56])
|
||||
|
||||
|
||||
always @(*) begin
|
||||
if (color_enable[3]==1'b1) //R
|
||||
sampled_r[31:24] = V_lerp[55:48];
|
||||
|
|
|
@ -166,7 +166,8 @@ module VX_tex_memory #(
|
|||
// Dcache Response
|
||||
|
||||
reg [3:0][`NUM_THREADS-1:0][31:0] rsp_texels, rsp_texels_n;
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_cur_data;
|
||||
wire [`NUM_THREADS-1:0][3:0][31:0] rsp_texels_qual;
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_data_qual;
|
||||
reg [RSP_CTR_W-1:0] rsp_rem_ctr;
|
||||
wire [`NUM_THREADS-1:0] rsp_cur_tmask;
|
||||
wire [RSP_CTR_W-1:0] rsp_max_cnt;
|
||||
|
@ -187,8 +188,9 @@ module VX_tex_memory #(
|
|||
|
||||
assign rsp_max_cnt = $countones(q_req_tmask) * (q_req_filter ? 4 : 1);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] src_data = (i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] src_mask = {32{dcache_rsp_if.valid[i]}};
|
||||
wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]) & src_mask;
|
||||
|
||||
reg [31:0] rsp_data_shifted;
|
||||
always @(*) begin
|
||||
|
@ -199,16 +201,16 @@ module VX_tex_memory #(
|
|||
|
||||
always @(*) begin
|
||||
case (q_req_stride)
|
||||
0: rsp_cur_data[i] = 32'(rsp_data_shifted[7:0]);
|
||||
1: rsp_cur_data[i] = 32'(rsp_data_shifted[15:0]);
|
||||
default: rsp_cur_data[i] = rsp_data_shifted;
|
||||
0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]);
|
||||
1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]);
|
||||
default: rsp_data_qual[i] = rsp_data_shifted;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
rsp_texels_n = rsp_texels;
|
||||
rsp_texels_n[rsp_texel_idx] |= rsp_cur_data;
|
||||
rsp_texels_n[rsp_texel_idx] |= rsp_data_qual;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -231,6 +233,12 @@ module VX_tex_memory #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
for (genvar j = 0; j < 4; ++j) begin
|
||||
assign rsp_texels_qual[i][j] = rsp_texels_n[j][i];
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
wire rsp_texels_done = dcache_rsp_fire && (rsp_rem_ctr == RSP_CTR_W'(rsp_cur_cnt));
|
||||
|
@ -244,8 +252,8 @@ module VX_tex_memory #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, rsp_texels_n, q_req_info}),
|
||||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_data, rsp_info})
|
||||
.data_in ({rsp_texels_done, q_req_wid, q_req_tmask, q_req_PC, q_req_filter, rsp_texels_qual, q_req_info}),
|
||||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_data, rsp_info})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
|
@ -254,12 +262,22 @@ module VX_tex_memory #(
|
|||
`ifdef DBG_PRINT_TEX
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_fire)) begin
|
||||
$display("%t: T$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, is_dup=%b",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_addr, dcache_req_if.tag, req_texel_dup);
|
||||
$write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag);
|
||||
`PRINT_ARRAY1D(req_texel_addr, `NUM_THREADS);
|
||||
$write(", is_dup=%b\n", req_texel_dup);
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
$display("%t: T$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, data=%0h, is_dup=%b",
|
||||
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_PC, dcache_rsp_if.tag, dcache_rsp_if.data, rsp_texel_dup);
|
||||
$write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag);
|
||||
`PRINT_ARRAY1D(rsp_data_qual, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (rsp_valid && rsp_ready) begin
|
||||
$write("%t: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, filter=%0d, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_filter);
|
||||
`PRINT_ARRAY2D(rsp_data, 4, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -38,7 +38,7 @@ module VX_tex_sampler #(
|
|||
|
||||
wire stall_out;
|
||||
|
||||
for (genvar i = 0; i<`NUM_THREADS ;i++ ) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
wire [3:0][63:0] formatted_data;
|
||||
wire [`NUM_COLOR_CHANNEL-1:0] color_enable;
|
||||
|
@ -57,24 +57,23 @@ module VX_tex_sampler #(
|
|||
VX_tex_bilerp #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) tex_bilerp (
|
||||
.blendU(req_u[i][`BLEND_FRAC_64-1:0]), //blendU
|
||||
.blendV(req_v[i][`BLEND_FRAC_64-1:0]), //blendV
|
||||
.blendU (req_u[i][`BLEND_FRAC_64-1:0]),
|
||||
.blendV (req_v[i][`BLEND_FRAC_64-1:0]),
|
||||
|
||||
.color_enable(color_enable),
|
||||
.texels(formatted_data),
|
||||
|
||||
.sampled_data(req_data_bilerp[i])
|
||||
.color_enable (color_enable),
|
||||
.texels (formatted_data),
|
||||
|
||||
.sampled_data (req_data_bilerp[i])
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
for (genvar i = 0;i<`NUM_THREADS ;i++ ) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign req_data[i] = (req_filter == `TEX_FILTER_BITS'(0)) ? req_texels[i][0] : req_data_bilerp[i];
|
||||
end
|
||||
|
||||
assign stall_out = ~rsp_ready;
|
||||
assign req_ready = rsp_ready;
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
|
@ -86,4 +85,7 @@ module VX_tex_sampler #(
|
|||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign req_ready = ~stall_out;
|
||||
|
||||
endmodule
|
|
@ -41,16 +41,28 @@ module VX_tex_unit #(
|
|||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_ADDR(i) : tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
`CSR_TEX_FORMAT(i) : tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
`CSR_TEX_ADDR(i) : begin
|
||||
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_FORMAT(i) : begin
|
||||
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
tex_wrap_u[i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
|
||||
tex_wrap_v[i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
|
||||
end
|
||||
`CSR_TEX_FILTER(i) : tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
`CSR_TEX_MIPOFF(i) : tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
`CSR_TEX_WIDTH(i) : tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
`CSR_TEX_HEIGHT(i) : tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
`CSR_TEX_FILTER(i) : begin
|
||||
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_MIPOFF(i) : begin
|
||||
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WIDTH(i) : begin
|
||||
tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_HEIGHT(i) : begin
|
||||
tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
end
|
||||
default:
|
||||
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES));
|
||||
|
@ -212,22 +224,34 @@ module VX_tex_unit #(
|
|||
);
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable
|
||||
&& (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i)
|
||||
&& tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin
|
||||
$display("%t: core%0d-tex_unit: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
|
||||
$display("%t: core%0d-tex_unit: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wrap_u[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wrap_v[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_width[i][0]);
|
||||
$display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_height[i][0]);
|
||||
end
|
||||
end
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (tex_req_if.valid && tex_req_if.ready) begin
|
||||
$display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, u=%0h, v=%0h, lod=%0h",
|
||||
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.u, tex_req_if.v, tex_req_if.lod);
|
||||
end
|
||||
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
|
||||
$write("%t: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
|
||||
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
|
||||
`PRINT_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue