mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-22 21:09:15 -04:00
increasing 8-bit lerp accuracy in h/w
This commit is contained in:
parent
d4e5367c18
commit
df95544a1c
9 changed files with 108 additions and 54 deletions
|
@ -124,7 +124,8 @@ echo "begin graphics tests..."
|
|||
|
||||
CONFIGS="-DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=simx --app=imadd
|
||||
CONFIGS="-DEXT_IMADD_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=imadd --args="-n32 -z"
|
||||
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DNUM_RASTER_UNITS=2" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png"
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DEXT_GFX_ENABLE -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE -DRCACHE_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=draw3d --args="-tbox.cgltrace -rbox_ref_128.png" --clusters=2 --cores=2 --warps=1 --threads=2
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-ttriangle.cgltrace -rtriangle_ref_8.png -w8 -h8" --warps=1 --threads=2 --debug=3
|
||||
|
|
|
@ -11,12 +11,9 @@ module VX_lerp_fx #(
|
|||
output wire [N-1:0] out
|
||||
);
|
||||
|
||||
wire [F:0] one = {1'b1, {F{1'b0}}};
|
||||
wire [F+1:0] sub = one - (F+1)'(frac);
|
||||
wire [(N+F):0] prod = in1 * sub + in2 * frac;
|
||||
`UNUSED_VAR (prod)
|
||||
|
||||
assign out = prod [F +: N];
|
||||
wire [F-1:0] sub = F'(-1) - frac;
|
||||
wire [N+F:0] tmp = in1 * sub + in2 * frac;
|
||||
assign out = N'((tmp + (tmp >> F)) >> F);
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -29,7 +29,7 @@ module VX_rop_blend #(
|
|||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam LATENCY = `LATENCY_IMUL + 1;
|
||||
localparam LATENCY = `LATENCY_IMUL + 2;
|
||||
|
||||
`UNUSED_VAR (dcrs)
|
||||
|
||||
|
@ -92,7 +92,8 @@ module VX_rop_blend #(
|
|||
.LATENCY (LATENCY)
|
||||
) rop_blend_multadd (
|
||||
.clk (clk),
|
||||
.enable (~stall),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.mode_rgb (dcrs.blend_mode_rgb),
|
||||
.mode_a (dcrs.blend_mode_a),
|
||||
.src_color (src_color_s1[i]),
|
||||
|
@ -106,6 +107,7 @@ module VX_rop_blend #(
|
|||
.LATENCY (LATENCY)
|
||||
) rop_blend_minmax (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.src_color (src_color_s1[i]),
|
||||
.dst_color (dst_color_s1[i]),
|
||||
|
@ -117,6 +119,7 @@ module VX_rop_blend #(
|
|||
.LATENCY (LATENCY)
|
||||
) rop_logic_op (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.op (dcrs.logic_op),
|
||||
.src_color (src_color_s1[i]),
|
||||
|
|
|
@ -3,8 +3,10 @@
|
|||
module VX_rop_blend_minmax #(
|
||||
parameter LATENCY = 1
|
||||
) (
|
||||
input clk,
|
||||
input enable,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire enable,
|
||||
|
||||
input rgba_t src_color,
|
||||
input rgba_t dst_color,
|
||||
|
@ -13,6 +15,8 @@ module VX_rop_blend_minmax #(
|
|||
output rgba_t max_out
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
rgba_t tmp_min;
|
||||
rgba_t tmp_max;
|
||||
|
||||
|
|
|
@ -15,10 +15,12 @@
|
|||
)
|
||||
|
||||
module VX_rop_blend_multadd #(
|
||||
parameter LATENCY = (`LATENCY_IMUL + 1)
|
||||
parameter LATENCY = 1
|
||||
) (
|
||||
input clk,
|
||||
input enable,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire enable,
|
||||
|
||||
input wire [`ROP_BLEND_MODE_BITS-1:0] mode_rgb,
|
||||
input wire [`ROP_BLEND_MODE_BITS-1:0] mode_a,
|
||||
|
@ -31,42 +33,46 @@ module VX_rop_blend_multadd #(
|
|||
|
||||
output rgba_t color_out
|
||||
);
|
||||
`STATIC_ASSERT((LATENCY >= `LATENCY_IMUL), ("invalid parameter"))
|
||||
|
||||
`STATIC_ASSERT((LATENCY > `LATENCY_IMUL), ("invalid parameter"))
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam LATENCY_REM = LATENCY - `LATENCY_IMUL;
|
||||
|
||||
wire [15:0] prod_src_r, prod_src_g, prod_src_b, prod_src_a;
|
||||
wire [15:0] prod_dst_r, prod_dst_g, prod_dst_b, prod_dst_a;
|
||||
|
||||
// src_color x src_factor
|
||||
`MULT8(clk, enable, prod_src_r, src_color.r, src_factor.r);
|
||||
`MULT8(clk, enable, prod_src_g, src_color.g, src_factor.g);
|
||||
`MULT8(clk, enable, prod_src_b, src_color.b, src_factor.b);
|
||||
`MULT8(clk, enable, prod_src_a, src_color.a, src_factor.a);
|
||||
|
||||
// dst_color x dst_factor
|
||||
`MULT8(clk, enable, prod_dst_r, dst_color.r, dst_factor.r);
|
||||
`MULT8(clk, enable, prod_dst_g, dst_color.g, dst_factor.g);
|
||||
`MULT8(clk, enable, prod_dst_b, dst_color.b, dst_factor.b);
|
||||
`MULT8(clk, enable, prod_dst_a, dst_color.a, dst_factor.a);
|
||||
|
||||
reg [15:0] sum_r, sum_g, sum_b, sum_a;
|
||||
reg [16:0] sum_r, sum_g, sum_b, sum_a;
|
||||
|
||||
// apply blend mode
|
||||
always @(*) begin
|
||||
// RGB blending
|
||||
case (mode_rgb)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
sum_r = prod_src_r + prod_dst_r + 16'hff;
|
||||
sum_g = prod_src_g + prod_dst_g + 16'hff;
|
||||
sum_b = prod_src_b + prod_dst_b + 16'hff;
|
||||
sum_r = prod_src_r + prod_dst_r;
|
||||
sum_g = prod_src_g + prod_dst_g;
|
||||
sum_b = prod_src_b + prod_dst_b;
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB: begin
|
||||
sum_r = prod_src_r - prod_dst_r + 16'hff;
|
||||
sum_g = prod_src_g - prod_dst_g + 16'hff;
|
||||
sum_b = prod_src_b - prod_dst_b + 16'hff;
|
||||
sum_r = prod_src_r - prod_dst_r;
|
||||
sum_g = prod_src_g - prod_dst_g;
|
||||
sum_b = prod_src_b - prod_dst_b;
|
||||
end
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
sum_r = prod_dst_r - prod_src_r + 16'hff;
|
||||
sum_g = prod_dst_g - prod_src_g + 16'hff;
|
||||
sum_b = prod_dst_b - prod_src_b + 16'hff;
|
||||
sum_r = prod_dst_r - prod_src_r;
|
||||
sum_g = prod_dst_g - prod_src_g;
|
||||
sum_b = prod_dst_b - prod_src_b;
|
||||
end
|
||||
default: begin
|
||||
sum_r = 'x;
|
||||
|
@ -74,16 +80,15 @@ module VX_rop_blend_multadd #(
|
|||
sum_b = 'x;
|
||||
end
|
||||
endcase
|
||||
// Alpha blending
|
||||
case (mode_a)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
sum_a = prod_src_a + prod_dst_a + 16'hff;
|
||||
sum_a = prod_src_a + prod_dst_a;
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB: begin
|
||||
sum_a = prod_src_a - prod_dst_a + 16'hff;
|
||||
sum_a = prod_src_a - prod_dst_a;
|
||||
end
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
sum_a = prod_dst_a - prod_src_a + 16'hff;
|
||||
sum_a = prod_dst_a - prod_src_a;
|
||||
end
|
||||
default: begin
|
||||
sum_a = 'x;
|
||||
|
@ -91,10 +96,49 @@ module VX_rop_blend_multadd #(
|
|||
endcase
|
||||
end
|
||||
|
||||
`UNUSED_VAR (sum_r)
|
||||
`UNUSED_VAR (sum_g)
|
||||
`UNUSED_VAR (sum_b)
|
||||
`UNUSED_VAR (sum_a)
|
||||
reg [15:0] clamp_r, clamp_g, clamp_b, clamp_a;
|
||||
|
||||
// clamp to (0, 255 * 255)
|
||||
always @(*) begin
|
||||
case (mode_rgb)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
clamp_r = (sum_r > 17'hFE01) ? 16'hFE01 : sum_r[15:0];
|
||||
clamp_g = (sum_g > 17'hFE01) ? 16'hFE01 : sum_g[15:0];
|
||||
clamp_b = (sum_b > 17'hFE01) ? 16'hFE01 : sum_b[15:0];
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB,
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
clamp_r = sum_r[16] ? 16'h0 : sum_r[15:0];
|
||||
clamp_g = sum_g[16] ? 16'h0 : sum_g[15:0];
|
||||
clamp_b = sum_b[16] ? 16'h0 : sum_b[15:0];
|
||||
end
|
||||
default: begin
|
||||
clamp_r = 'x;
|
||||
clamp_g = 'x;
|
||||
clamp_b = 'x;
|
||||
end
|
||||
endcase
|
||||
case (mode_a)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
clamp_a = (sum_a > 17'hFE01) ? 16'hFE01 : sum_a[15:0];
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB,
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
clamp_a = sum_a[16] ? 16'h0 : sum_a[15:0];
|
||||
end
|
||||
default: begin
|
||||
clamp_a = 'x;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
rgba_t result;
|
||||
|
||||
// divide by 255
|
||||
assign result.r = 8'((clamp_r + (clamp_r >> 8)) >> 8);
|
||||
assign result.g = 8'((clamp_g + (clamp_g >> 8)) >> 8);
|
||||
assign result.b = 8'((clamp_b + (clamp_b >> 8)) >> 8);
|
||||
assign result.a = 8'((clamp_a + (clamp_a >> 8)) >> 8);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (32),
|
||||
|
@ -103,8 +147,8 @@ module VX_rop_blend_multadd #(
|
|||
.clk (clk),
|
||||
`UNUSED_PIN (reset),
|
||||
.enable (enable),
|
||||
.data_in ({sum_a[15:8], sum_r[15:8], sum_g[15:8], sum_b[15:8]}),
|
||||
.data_out ({color_out.a, color_out.r, color_out.g, color_out.b})
|
||||
.data_in (result),
|
||||
.data_out (color_out)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -3,8 +3,10 @@
|
|||
module VX_rop_logic_op #(
|
||||
parameter LATENCY = 1
|
||||
) (
|
||||
input clk,
|
||||
input enable,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire enable,
|
||||
|
||||
input wire [`ROP_LOGIC_OP_BITS-1:0] op,
|
||||
input wire [31:0] src_color,
|
||||
|
@ -13,6 +15,8 @@ module VX_rop_logic_op #(
|
|||
output wire [31:0] color_out
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [31:0] tmp_color;
|
||||
|
||||
always @(*) begin
|
||||
|
|
|
@ -52,6 +52,7 @@ module VX_rop_mem #(
|
|||
wire [NUM_REQS-1:0][3:0] mreq_byteen, mreq_byteen_r;
|
||||
wire [TAG_WIDTH-1:0] mreq_tag, mreq_tag_r;
|
||||
wire mreq_ready_r;
|
||||
wire mreq_stall;
|
||||
|
||||
wire mrsp_valid;
|
||||
wire [NUM_REQS-1:0] mrsp_mask;
|
||||
|
@ -68,9 +69,9 @@ module VX_rop_mem #(
|
|||
assign stencil_byteen[i] = (dcrs.stencil_writemask[req_face[i]] != 0);
|
||||
end
|
||||
|
||||
wire mul_ready_in;
|
||||
wire mul_enable;
|
||||
|
||||
// DS submission
|
||||
// depth/stencil values submission
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [31:0] m_y_pitch, baddr_s;
|
||||
|
||||
|
@ -81,7 +82,7 @@ module VX_rop_mem #(
|
|||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.enable (mul_ready_in),
|
||||
.enable (mul_enable),
|
||||
.dataa (req_pos_y[i]),
|
||||
.datab (dcrs.zbuf_pitch),
|
||||
.result (m_y_pitch)
|
||||
|
@ -98,7 +99,7 @@ module VX_rop_mem #(
|
|||
) shift_reg (
|
||||
.clk (clk),
|
||||
`UNUSED_PIN (reset),
|
||||
.enable (mul_ready_in),
|
||||
.enable (mul_enable),
|
||||
.data_in ({mask, byteen, baddr, data}),
|
||||
.data_out ({mreq_mask[i], mreq_byteen[i], baddr_s, mreq_data[i]})
|
||||
);
|
||||
|
@ -109,7 +110,7 @@ module VX_rop_mem #(
|
|||
`UNUSED_VAR (addr)
|
||||
end
|
||||
|
||||
// Bland submission
|
||||
// blend color submission
|
||||
for (genvar i = NUM_LANES; i < NUM_REQS; ++i) begin
|
||||
wire [31:0] m_y_pitch, baddr_s;
|
||||
|
||||
|
@ -120,7 +121,7 @@ module VX_rop_mem #(
|
|||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.enable (mul_ready_in),
|
||||
.enable (mul_enable),
|
||||
.dataa (req_pos_y[i - NUM_LANES]),
|
||||
.datab (dcrs.cbuf_pitch),
|
||||
.result (m_y_pitch)
|
||||
|
@ -137,7 +138,7 @@ module VX_rop_mem #(
|
|||
) shift_reg (
|
||||
.clk (clk),
|
||||
`UNUSED_PIN (reset),
|
||||
.enable (mul_ready_in),
|
||||
.enable (mul_enable),
|
||||
.data_in ({mask, byteen, baddr, data}),
|
||||
.data_out ({mreq_mask[i], mreq_byteen[i], baddr_s, mreq_data[i]})
|
||||
);
|
||||
|
@ -145,7 +146,7 @@ module VX_rop_mem #(
|
|||
wire [31:0] addr = baddr_s + m_y_pitch;
|
||||
|
||||
assign mreq_addr[i] = addr[(32-OCACHE_ADDR_WIDTH) +: OCACHE_ADDR_WIDTH];
|
||||
`UNUSED_VAR (addr)
|
||||
`UNUSED_VAR (addr)
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
|
@ -155,16 +156,14 @@ module VX_rop_mem #(
|
|||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.enable (mul_enable),
|
||||
.data_in ({req_valid, req_rw, req_tag}),
|
||||
.data_out ({mreq_valid, mreq_rw, mreq_tag})
|
||||
);
|
||||
|
||||
wire mreq_stall = mreq_valid_r && ~mreq_ready_r;
|
||||
assign req_ready = mul_enable;
|
||||
|
||||
assign mul_ready_in = ~(mreq_valid && mreq_stall);
|
||||
|
||||
assign req_ready = mul_ready_in;
|
||||
assign mul_enable = ~(mreq_valid && mreq_stall);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + NUM_REQS * (1 + 4 + OCACHE_ADDR_WIDTH + 32) + TAG_WIDTH),
|
||||
|
@ -177,6 +176,8 @@ module VX_rop_mem #(
|
|||
.data_out ({mreq_valid_r, mreq_rw_r, mreq_mask_r, mreq_byteen_r, mreq_addr_r, mreq_data_r, mreq_tag_r})
|
||||
);
|
||||
|
||||
assign mreq_stall = mreq_valid_r && ~mreq_ready_r;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
VX_mem_scheduler #(
|
||||
|
|
|
@ -40,7 +40,7 @@ bool sw_interp = false;
|
|||
uint32_t start_draw = 0;
|
||||
uint32_t end_draw = -1;
|
||||
|
||||
uint32_t clear_color = 0x00000000;
|
||||
uint32_t clear_color = 0xff000000;
|
||||
uint32_t clear_depth = 0xffffffff;
|
||||
|
||||
uint32_t dst_width = 128;
|
||||
|
@ -542,7 +542,7 @@ int main(int argc, char *argv[]) {
|
|||
cleanup();
|
||||
|
||||
if (reference_file) {
|
||||
auto errors = CompareImages(output_file, reference_file, FORMAT_A8R8G8B8, 0);
|
||||
auto errors = CompareImages(output_file, reference_file, FORMAT_A8R8G8B8, 2);
|
||||
if (0 == errors) {
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
} else {
|
||||
|
|
|
@ -33,7 +33,7 @@ const char* trace_file = "triangle.cgltrace";
|
|||
const char* output_file = "output.png";
|
||||
const char* reference_file = nullptr;
|
||||
|
||||
uint32_t clear_color = 0x00000000;
|
||||
uint32_t clear_color = 0xff000000;
|
||||
|
||||
uint32_t dst_width = 128;
|
||||
uint32_t dst_height = 128;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue