mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
timimg fixes
This commit is contained in:
parent
03a1a4b9f1
commit
4683def6dd
6 changed files with 99 additions and 76 deletions
|
@ -27,8 +27,8 @@ module VX_tex_addr #(
|
|||
input wire [`TEX_ADDR_BITS-1:0] base_addr,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] mip_offsets,
|
||||
input wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] log_widths,
|
||||
input wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] log_heights,
|
||||
input wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_widths,
|
||||
input wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_heights,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] coord_u,
|
||||
input wire [`NUM_THREADS-1:0][31:0] coord_v,
|
||||
|
@ -50,9 +50,20 @@ module VX_tex_addr #(
|
|||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u;
|
||||
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_v;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride;
|
||||
wire [`NUM_THREADS-1:0][1:0][`FIXED_FRAC-1:0] clamped_u, clamped_v, clamped_u_s0, clamped_v_s0;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [`NUM_THREADS-1:0][31:0] mip_addr, mip_addr_s0;
|
||||
|
||||
wire valid_in_s0;
|
||||
wire [`NW_BITS-1:0] req_wid_s0;
|
||||
wire [`NUM_THREADS-1:0] req_tmask_s0;
|
||||
wire [31:0] req_PC_s0;
|
||||
wire [REQ_INFO_WIDTH-1:0] req_info_s0;
|
||||
wire [`TEX_FILTER_BITS-1:0] filter_s0;
|
||||
wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_widths_s0;
|
||||
wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] log_heights_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
// stride
|
||||
|
||||
|
@ -66,9 +77,7 @@ module VX_tex_addr #(
|
|||
// addressing mode
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
wire [31:0] fu[1:0];
|
||||
wire [31:0] fv[1:0];
|
||||
wire [1:0][31:0] fu, fv;
|
||||
|
||||
assign fu[0] = coord_u[i] - (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
assign fu[1] = coord_u[i] + (filter ? (`FIXED_HALF >> log_widths[i]) : 0);
|
||||
|
@ -107,47 +116,56 @@ module VX_tex_addr #(
|
|||
.coord_i (fv[1]),
|
||||
.coord_o (clamped_v[i][1])
|
||||
);
|
||||
|
||||
assign mip_addr[i] = base_addr + 32'(mip_offsets[i]);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + `NUM_THREADS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, req_info, log_widths, log_heights, mip_addr, clamped_u, clamped_v}),
|
||||
.data_out ({valid_in_s0, req_wid_s0, req_tmask_s0, req_PC_s0, filter_s0, log_stride_s0, req_info_s0, log_widths_s0, log_heights_s0, mip_addr_s0, clamped_u_s0, clamped_v_s0})
|
||||
);
|
||||
|
||||
// addresses generation
|
||||
|
||||
wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_u, blend_v;
|
||||
wire [`NUM_THREADS-1:0][3:0][31:0] addr;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [1:0][`FIXED_INT-1:0] x, y;
|
||||
|
||||
wire [`FIXED_FRAC-1:0] x [1:0];
|
||||
wire [`FIXED_FRAC-1:0] y [1:0];
|
||||
assign x[0] = `FIXED_INT'(clamped_u_s0[i][0] >> ((`FIXED_FRAC) - log_widths_s0[i]));
|
||||
assign x[1] = `FIXED_INT'(clamped_u_s0[i][1] >> ((`FIXED_FRAC) - log_widths_s0[i]));
|
||||
assign y[0] = `FIXED_INT'(clamped_v_s0[i][0] >> ((`FIXED_FRAC) - log_heights_s0[i]));
|
||||
assign y[1] = `FIXED_INT'(clamped_v_s0[i][1] >> ((`FIXED_FRAC) - log_heights_s0[i]));
|
||||
|
||||
assign x[0] = clamped_u[i][0] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
assign x[1] = clamped_u[i][1] >> ((`FIXED_FRAC) - log_widths[i]);
|
||||
|
||||
assign y[0] = clamped_v[i][0] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
assign y[1] = clamped_v[i][1] >> ((`FIXED_FRAC) - log_heights[i]);
|
||||
|
||||
assign addr[i][0] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[0]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][1] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[0]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][2] = base_addr + 32'(mip_offsets[i]) + (32'(x[0]) + (32'(y[1]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][3] = base_addr + 32'(mip_offsets[i]) + (32'(x[1]) + (32'(y[1]) << log_widths[i])) << log_stride;
|
||||
assign addr[i][0] = mip_addr_s0[i] + (32'(x[0]) + (32'(y[0]) << log_widths_s0[i])) << log_stride_s0;
|
||||
assign addr[i][1] = mip_addr_s0[i] + (32'(x[1]) + (32'(y[0]) << log_widths_s0[i])) << log_stride_s0;
|
||||
assign addr[i][2] = mip_addr_s0[i] + (32'(x[0]) + (32'(y[1]) << log_widths_s0[i])) << log_stride_s0;
|
||||
assign addr[i][3] = mip_addr_s0[i] + (32'(x[1]) + (32'(y[1]) << log_widths_s0[i])) << log_stride_s0;
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_u, blend_v;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign blend_u[i] = clamped_u[i][0][`BLEND_FRAC-1:0];
|
||||
assign blend_v[i] = clamped_v[i][0][`BLEND_FRAC-1:0];
|
||||
assign blend_u[i] = clamped_u_s0[i][0][`BLEND_FRAC-1:0];
|
||||
assign blend_v[i] = clamped_v_s0[i][0][`BLEND_FRAC-1:0];
|
||||
end
|
||||
|
||||
wire stall_out = rsp_valid && ~rsp_ready;
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (`NUM_THREADS * 4 * 32) + (2*`NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_in, req_wid, req_tmask, req_PC, filter, log_stride, addr, blend_u, blend_v, req_info}),
|
||||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_stride, rsp_addr, rsp_blend_u, rsp_blend_v, rsp_info})
|
||||
.data_in ({valid_in_s0, req_wid_s0, req_tmask_s0, req_PC_s0, filter_s0, log_stride_s0, addr, blend_u, blend_v, req_info_s0}),
|
||||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_filter, rsp_stride, rsp_addr, rsp_blend_u, rsp_blend_v, rsp_info})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall_out;
|
||||
|
|
|
@ -14,8 +14,7 @@
|
|||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_WIDTH_BITS 4
|
||||
`define TEX_HEIGHT_BITS 4
|
||||
`define TEX_DIM_BITS 4
|
||||
`define TEX_FILTER_BITS 1
|
||||
|
||||
`define TEX_MIPOFF_BITS (2*12+1)
|
||||
|
|
|
@ -7,29 +7,11 @@ module VX_tex_lerp #(
|
|||
input wire [31:0] in2,
|
||||
output wire [31:0] out
|
||||
);
|
||||
wire [63:0] in1_w, in2_w;
|
||||
wire [63:0] lerp1, lerp2;
|
||||
|
||||
`UNUSED_VAR (lerp1)
|
||||
`UNUSED_VAR (lerp2)
|
||||
|
||||
assign in1_w[15:00] = {8'h00, in1[07:00]};
|
||||
assign in1_w[31:16] = {8'h00, in1[15:08]};
|
||||
assign in1_w[47:32] = {8'h00, in1[23:16]};
|
||||
assign in1_w[63:48] = {8'h00, in1[31:24]};
|
||||
|
||||
assign in2_w[15:00] = {8'h00, in2[07:00]};
|
||||
assign in2_w[31:16] = {8'h00, in2[15:08]};
|
||||
assign in2_w[47:32] = {8'h00, in2[23:16]};
|
||||
assign in2_w[63:48] = {8'h00, in2[31:24]};
|
||||
|
||||
assign lerp1 = (in2_w - in1_w) * blend;
|
||||
|
||||
assign lerp2 = in1_w + {8'h00,lerp1[63:56], 8'h00,lerp1[47:40], 8'h00,lerp1[31:24], 8'h00,lerp1[15:8]};
|
||||
|
||||
assign out[07:00] = lerp2[07:00];
|
||||
assign out[15:08] = lerp2[23:16];
|
||||
assign out[23:16] = lerp2[39:32];
|
||||
assign out[31:24] = lerp2[55:48];
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
wire [8:0] m1 = (8'hff - blend);
|
||||
wire [16:0] sum = in1[i*8+:8] * blend + in2[i*8+:8] * m1;
|
||||
`UNUSED_VAR (sum)
|
||||
assign out[i*8+:8] = sum[15:8];
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -32,15 +32,25 @@ module VX_tex_sampler #(
|
|||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] texel_ul, texel_uh;
|
||||
wire [`NUM_THREADS-1:0][31:0] texel_ul_s0, texel_uh_s0;
|
||||
wire [`NUM_THREADS-1:0][`BLEND_FRAC-1:0] blend_v_qual, blend_v_s0;
|
||||
wire [`NUM_THREADS-1:0][31:0] texel_v;
|
||||
|
||||
wire req_valid_s0;
|
||||
wire [`NW_BITS-1:0] req_wid_s0;
|
||||
wire [`NUM_THREADS-1:0] req_tmask_s0;
|
||||
wire [31:0] req_PC_s0;
|
||||
wire [`NR_BITS-1:0] req_rd_s0;
|
||||
wire req_wb_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
wire [3:0][31:0] fmt_texels;
|
||||
wire [31:0] texel_ul, texel_uh, texel_v;
|
||||
wire [3:0][31:0] fmt_texels;
|
||||
wire [31:0] texel_ul_unqual;
|
||||
|
||||
for (genvar j = 0; j < 4; j++) begin
|
||||
VX_tex_format #(
|
||||
|
@ -57,7 +67,7 @@ module VX_tex_sampler #(
|
|||
.blend (req_blend_u[i]),
|
||||
.in1 (fmt_texels[0]),
|
||||
.in2 (fmt_texels[1]),
|
||||
.out (texel_ul)
|
||||
.out (texel_ul_unqual)
|
||||
);
|
||||
|
||||
VX_tex_lerp #(
|
||||
|
@ -65,18 +75,32 @@ module VX_tex_sampler #(
|
|||
.blend (req_blend_u[i]),
|
||||
.in1 (fmt_texels[2]),
|
||||
.in2 (fmt_texels[3]),
|
||||
.out (texel_uh)
|
||||
.out (texel_uh[i])
|
||||
);
|
||||
|
||||
assign blend_v_qual[i] = req_filter ? `BLEND_FRAC'(0) : req_blend_v[i];
|
||||
assign texel_ul[i] = req_filter ? fmt_texels[0] : texel_ul_unqual;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * `BLEND_FRAC) + (2 * `NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, blend_v_qual, texel_ul, texel_uh}),
|
||||
.data_out ({req_valid_s0, req_wid_s0, req_tmask_s0, req_PC_s0, req_rd_s0, req_wb_s0, blend_v_s0, texel_ul_s0, texel_uh_s0})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_v (
|
||||
.blend (req_blend_v[i]),
|
||||
.in1 (texel_ul),
|
||||
.in2 (texel_uh),
|
||||
.out (texel_v)
|
||||
.blend (blend_v_s0[i]),
|
||||
.in1 (texel_ul_s0[i]),
|
||||
.in2 (texel_uh_s0[i]),
|
||||
.out (texel_v[i])
|
||||
);
|
||||
|
||||
assign result[i] = req_filter ? texel_v : fmt_texels[0];
|
||||
end
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
@ -84,12 +108,12 @@ module VX_tex_sampler #(
|
|||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_wid, req_tmask, req_PC, req_rd, req_wb, result}),
|
||||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data})
|
||||
.data_in ({req_valid_s0, req_wid_s0, req_tmask_s0, req_PC_s0, req_rd_s0, req_wb_s0, texel_v}),
|
||||
.data_out ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
|
|
|
@ -25,8 +25,8 @@ module VX_tex_unit #(
|
|||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_WIDTH_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_HEIGHT_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_DIM_BITS-1:0] tex_width [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_DIM_BITS-1:0] tex_height [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
|
||||
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
|
||||
|
@ -58,10 +58,10 @@ module VX_tex_unit #(
|
|||
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WIDTH(i) : begin
|
||||
tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_WIDTH_BITS-1:0];
|
||||
tex_width[i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_HEIGHT(i) : begin
|
||||
tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_HEIGHT_BITS-1:0];
|
||||
tex_height[i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
|
||||
end
|
||||
default:
|
||||
assert(tex_csr_if.write_addr >= `CSR_TEX_BEGIN(0)
|
||||
|
@ -74,8 +74,8 @@ module VX_tex_unit #(
|
|||
// mipmap attributes
|
||||
|
||||
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] tex_mipoffs;
|
||||
wire [`NUM_THREADS-1:0][`TEX_WIDTH_BITS-1:0] tex_widths;
|
||||
wire [`NUM_THREADS-1:0][`TEX_HEIGHT_BITS-1:0] tex_heights;
|
||||
wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] tex_widths;
|
||||
wire [`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] tex_heights;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
|
||||
|
|
|
@ -45,7 +45,7 @@ set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
|||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE AREA
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue