mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
texture unit critical path optimization
This commit is contained in:
parent
deed327890
commit
43ad188ccb
8 changed files with 100 additions and 63 deletions
|
@ -91,6 +91,8 @@
|
|||
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`define ADDER_CARRY_WIDTH(x, y) `MAX(x, `MIN(x, y)+1);
|
||||
|
||||
`define PRINT_ARRAY1D(a, m) \
|
||||
$write("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
|
|
|
@ -36,6 +36,8 @@ module VX_tex_addr #(
|
|||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam PITCH_BITS = `ADDER_CARRY_WIDTH(`TEX_DIM_BITS, `TEX_STRIDE_BITS);
|
||||
|
||||
wire valid_s0;
|
||||
wire [NUM_REQS-1:0] tmask_s0;
|
||||
wire [`TEX_FILTER_BITS-1:0] filter_s0;
|
||||
|
@ -44,7 +46,8 @@ module VX_tex_addr #(
|
|||
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0;
|
||||
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
|
||||
wire [NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_height, log_height_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
|
@ -61,8 +64,9 @@ module VX_tex_addr #(
|
|||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - (`FIXED_HALF >> req_logdims[i][j])) : req_coords[j][i];
|
||||
wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + (`FIXED_HALF >> req_logdims[i][j])) : req_coords[j][i];
|
||||
wire [`FIXED_FRAC-1:0] delta = (`FIXED_HALF >> req_logdims[i][j]);
|
||||
wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - 32'(delta)) : req_coords[j][i];
|
||||
wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + 32'(delta)) : req_coords[j][i];
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
|
@ -79,41 +83,45 @@ module VX_tex_addr #(
|
|||
.coord_i (coord_hi),
|
||||
.coord_o (clamped_hi[i][j])
|
||||
);
|
||||
end
|
||||
assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]);
|
||||
end
|
||||
assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride);
|
||||
assign log_height[i] = req_logdims[i][1];
|
||||
assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (PITCH_BITS + `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_logdims, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, log_height, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_height_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
);
|
||||
|
||||
// addresses generation
|
||||
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi;
|
||||
wire [NUM_REQS-1:0][(`FIXED_INT+`TEX_STRIDE_BITS)-1:0] scaled_u_lo, scaled_u_hi;
|
||||
wire [NUM_REQS-1:0][`FIXED_INT-1:0] scaled_v_lo, scaled_v_hi;
|
||||
wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends;
|
||||
wire [NUM_REQS-1:0][3:0][31:0] addr;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
assign scaled_lo[i][j] = `FIXED_INT'(clamped_lo_s0[i][j] >> ((`FIXED_FRAC) - log_dims_s0[i][j]));
|
||||
assign scaled_hi[i][j] = `FIXED_INT'(clamped_hi_s0[i][j] >> ((`FIXED_FRAC) - log_dims_s0[i][j]));
|
||||
assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
|
||||
assign scaled_u_lo[i] = scale_to_pitch(clamped_lo_s0[i][0], log_pitch_s0[i]);
|
||||
assign scaled_u_hi[i] = scale_to_pitch(clamped_hi_s0[i][0], log_pitch_s0[i]);
|
||||
assign scaled_v_lo[i] = scale_to_height(clamped_lo_s0[i][1], log_height_s0[i]);
|
||||
assign scaled_v_hi[i] = scale_to_height(clamped_hi_s0[i][1], log_height_s0[i]);
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[i][0]) + (32'(scaled_lo[i][1]) << log_dims_s0[i][0])) << log_stride_s0;
|
||||
assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[i][0]) + (32'(scaled_lo[i][1]) << log_dims_s0[i][0])) << log_stride_s0;
|
||||
assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[i][0]) + (32'(scaled_hi[i][1]) << log_dims_s0[i][0])) << log_stride_s0;
|
||||
assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[i][0]) + (32'(scaled_hi[i][1]) << log_dims_s0[i][0])) << log_stride_s0;
|
||||
assign addr[i][0] = mip_addr_s0[i] + 32'(scaled_u_lo[i]) + (32'(scaled_v_lo[i]) << log_pitch_s0[i]);
|
||||
assign addr[i][1] = mip_addr_s0[i] + 32'(scaled_u_hi[i]) + (32'(scaled_v_lo[i]) << log_pitch_s0[i]);
|
||||
assign addr[i][2] = mip_addr_s0[i] + 32'(scaled_u_lo[i]) + (32'(scaled_v_hi[i]) << log_pitch_s0[i]);
|
||||
assign addr[i][3] = mip_addr_s0[i] + 32'(scaled_u_hi[i]) + (32'(scaled_v_hi[i]) << log_pitch_s0[i]);
|
||||
end
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
@ -131,9 +139,10 @@ module VX_tex_addr #(
|
|||
|
||||
assign req_ready = ~stall_out;
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
`ifdef DBG_PRINT_TEX
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [31:0] rsp_PC;
|
||||
|
||||
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -146,4 +155,22 @@ module VX_tex_addr #(
|
|||
end
|
||||
`endif
|
||||
|
||||
function logic [(`FIXED_INT+`TEX_STRIDE_BITS)-1:0] scale_to_pitch (input logic [`FIXED_FRAC-1:0] src,
|
||||
input logic [PITCH_BITS-1:0] dim);
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
logic [(`FIXED_BITS+`TEX_STRIDE_BITS)-1:0] out;
|
||||
`IGNORE_UNUSED_END
|
||||
out = (`FIXED_BITS+`TEX_STRIDE_BITS)'(src) << dim;
|
||||
return out[`FIXED_FRAC +: (`FIXED_INT+`TEX_STRIDE_BITS)];
|
||||
endfunction
|
||||
|
||||
function logic [`FIXED_INT-1:0] scale_to_height (input logic [`FIXED_FRAC-1:0] src,
|
||||
input logic [`TEX_DIM_BITS-1:0] dim);
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
logic [`FIXED_BITS-1:0] out;
|
||||
`IGNORE_UNUSED_END
|
||||
out = `FIXED_BITS'(src) << dim;
|
||||
return out[`FIXED_FRAC +: `FIXED_INT];
|
||||
endfunction
|
||||
|
||||
endmodule
|
|
@ -3,11 +3,12 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define FIXED_FRAC 20
|
||||
`define FIXED_INT (32 - `FIXED_FRAC)
|
||||
`define FIXED_ONE (2 ** `FIXED_FRAC)
|
||||
`define FIXED_HALF (`FIXED_ONE >> 1)
|
||||
`define FIXED_MASK (`FIXED_ONE - 1)
|
||||
`define FIXED_BITS 32
|
||||
`define FIXED_FRAC 20
|
||||
`define FIXED_INT (`FIXED_BITS - `FIXED_FRAC)
|
||||
`define FIXED_ONE (2 ** `FIXED_FRAC)
|
||||
`define FIXED_HALF (`FIXED_ONE >> 1)
|
||||
`define FIXED_MASK (`FIXED_ONE - 1)
|
||||
|
||||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
|
|
|
@ -13,6 +13,12 @@ module VX_tex_format #(
|
|||
|
||||
always @(*) begin
|
||||
case (format)
|
||||
`TEX_FORMAT_R8G8B8A8: begin
|
||||
texel_out_r[07:00] = texel_in[7:0];
|
||||
texel_out_r[15:08] = texel_in[15:8];
|
||||
texel_out_r[23:16] = texel_in[23:16];
|
||||
texel_out_r[31:24] = texel_in[31:24];
|
||||
end
|
||||
`TEX_FORMAT_R5G6B5: begin
|
||||
texel_out_r[07:00] = {texel_in[15:11], texel_in[15:13]};
|
||||
texel_out_r[15:08] = {texel_in[10:5], texel_in[10:9]};
|
||||
|
@ -31,24 +37,18 @@ module VX_tex_format #(
|
|||
texel_out_r[23:16] = texel_in[7:0];
|
||||
texel_out_r[31:24] = texel_in[15:8];
|
||||
end
|
||||
`TEX_FORMAT_A8: begin
|
||||
texel_out_r[07:00] = 0;
|
||||
texel_out_r[15:08] = 0;
|
||||
texel_out_r[23:16] = 0;
|
||||
texel_out_r[31:24] = texel_in[7:0];
|
||||
end
|
||||
`TEX_FORMAT_L8: begin
|
||||
texel_out_r[07:00] = texel_in[7:0];
|
||||
texel_out_r[15:08] = texel_in[7:0];
|
||||
texel_out_r[23:16] = texel_in[7:0];
|
||||
texel_out_r[31:24] = 8'hff;
|
||||
end
|
||||
// `TEX_FORMAT_R8G8B8A8
|
||||
default: begin
|
||||
texel_out_r[07:00] = texel_in[7:0];
|
||||
texel_out_r[15:08] = texel_in[15:8];
|
||||
texel_out_r[23:16] = texel_in[23:16];
|
||||
texel_out_r[31:24] = texel_in[31:24];
|
||||
//`TEX_FORMAT_A8
|
||||
default: begin
|
||||
texel_out_r[07:00] = 0;
|
||||
texel_out_r[15:08] = 0;
|
||||
texel_out_r[23:16] = 0;
|
||||
texel_out_r[31:24] = texel_in[7:0];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
`include "VX_tex_define.vh"
|
||||
|
||||
module VX_tex_lerp #(
|
||||
) (
|
||||
input wire [`BLEND_FRAC-1:0] blend,
|
||||
input wire [31:0] in1,
|
||||
input wire [31:0] in2,
|
||||
output wire [31:0] out
|
||||
module VX_tex_lerp (
|
||||
input wire [3:0][7:0] in1,
|
||||
input wire [3:0][7:0] in2,
|
||||
input wire [8:0] alpha,
|
||||
input wire [7:0] beta,
|
||||
output wire [3:0][7:0] out
|
||||
);
|
||||
for (genvar i = 0; i < 4; ++i) begin
|
||||
wire [8:0] blend_m1 = `BLEND_ONE - blend;
|
||||
wire [16:0] sum = in1[i*8+:8] * blend_m1 + in2[i*8+:8] * blend;
|
||||
wire [16:0] sum = in1[i] * alpha + in2[i] * beta;
|
||||
`UNUSED_VAR (sum)
|
||||
assign out[i*8+:8] = sum[15:8];
|
||||
assign out[i] = sum[15:8];
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -51,20 +51,25 @@ module VX_tex_sampler #(
|
|||
);
|
||||
end
|
||||
|
||||
wire [7:0] beta = req_blends[i][0];
|
||||
wire [8:0] alpha = `BLEND_ONE - beta;
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_ul (
|
||||
.blend (req_blends[i][0]),
|
||||
.in1 (fmt_texels[0]),
|
||||
.in2 (fmt_texels[1]),
|
||||
.out (texel_ul[i])
|
||||
.in1 (fmt_texels[0]),
|
||||
.in2 (fmt_texels[1]),
|
||||
.alpha (alpha),
|
||||
.beta (beta),
|
||||
.out (texel_ul[i])
|
||||
);
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_uh (
|
||||
.blend (req_blends[i][0]),
|
||||
.in1 (fmt_texels[2]),
|
||||
.in2 (fmt_texels[3]),
|
||||
.out (texel_uh[i])
|
||||
.in1 (fmt_texels[2]),
|
||||
.in2 (fmt_texels[3]),
|
||||
.alpha (alpha),
|
||||
.beta (beta),
|
||||
.out (texel_uh[i])
|
||||
);
|
||||
|
||||
assign blend_v[i] = req_blends[i][1];
|
||||
|
@ -82,12 +87,16 @@ module VX_tex_sampler #(
|
|||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
wire [7:0] beta = blend_v_s0[i];
|
||||
wire [8:0] alpha = `BLEND_ONE - beta;
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_v (
|
||||
.blend (blend_v_s0[i]),
|
||||
.in1 (texel_ul_s0[i]),
|
||||
.in2 (texel_uh_s0[i]),
|
||||
.out (texel_v[i])
|
||||
.in1 (texel_ul_s0[i]),
|
||||
.in2 (texel_uh_s0[i]),
|
||||
.alpha (alpha),
|
||||
.beta (beta),
|
||||
.out (texel_v[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -108,7 +117,6 @@ module VX_tex_sampler #(
|
|||
assign req_ready = ~stall_out;
|
||||
|
||||
`ifdef DBG_PRINT_TEX
|
||||
|
||||
wire [`NW_BITS-1:0] req_wid, rsp_wid;
|
||||
wire [31:0] req_PC, rsp_PC;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_sat_fx #(
|
||||
module VX_tex_sat #(
|
||||
parameter IN_W = 1,
|
||||
parameter OUT_W = 1,
|
||||
parameter MODEL = 1
|
||||
|
@ -11,11 +11,11 @@ module VX_sat_fx #(
|
|||
`STATIC_ASSERT(((OUT_W+1) < IN_W), ("invalid parameter"))
|
||||
|
||||
if (MODEL == 1) begin
|
||||
assign data_out = data_in[IN_W-1] ? OUT_W'(0) : ((data_in > {OUT_W{1'b1}}) ? {OUT_W{1'b1}} : OUT_W'(data_in));
|
||||
end else begin
|
||||
wire [OUT_W-1:0] underflow_mask = {OUT_W{~data_in[IN_W-1]}};
|
||||
wire [OUT_W-1:0] overflow_mask = {OUT_W{(| data_in[IN_W-2:OUT_W])}};
|
||||
assign data_out = (data_in[OUT_W-1:0] | overflow_mask) & underflow_mask;
|
||||
assign data_out = (data_in[OUT_W-1:0] | overflow_mask) & underflow_mask;
|
||||
end else begin
|
||||
assign data_out = data_in[IN_W-1] ? OUT_W'(0) : ((data_in > {OUT_W{1'b1}}) ? {OUT_W{1'b1}} : OUT_W'(data_in));
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -14,7 +14,7 @@ module VX_tex_wrap #(
|
|||
|
||||
wire [`FIXED_FRAC-1:0] clamp;
|
||||
|
||||
VX_sat_fx #(
|
||||
VX_tex_sat #(
|
||||
.IN_W (32),
|
||||
.OUT_W (`FIXED_FRAC)
|
||||
) sat_fx (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue