mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-22 21:09:15 -04:00
minor update
This commit is contained in:
parent
786face3cd
commit
87f7bf1f86
8 changed files with 84 additions and 102 deletions
|
@ -34,8 +34,7 @@ module VX_generic_buffer #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1),
|
||||
.DEPTH (1)
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
`TRACING_OFF
|
||||
module VX_pipe_register #(
|
||||
parameter DATAW = 1,
|
||||
parameter RESETW = DATAW,
|
||||
parameter RESETW = 0,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -64,8 +64,7 @@ module VX_pipe_register #(
|
|||
for (genvar i = 1; i <= DEPTH; ++i) begin
|
||||
VX_pipe_register #(
|
||||
.DATAW (DATAW),
|
||||
.RESETW (RESETW),
|
||||
.DEPTH (1)
|
||||
.RESETW (RESETW)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -13,25 +13,34 @@ module VX_shift_register #(
|
|||
input wire [DATAW-1:0] data_in,
|
||||
output wire [NTAPS-1:0][DATAW-1:0] data_out
|
||||
);
|
||||
localparam TOTAL_DEPTH = NTAPS * DEPTH;
|
||||
if (DEPTH != 0) begin
|
||||
localparam TOTAL_DEPTH = NTAPS * DEPTH;
|
||||
|
||||
reg [TOTAL_DEPTH-1:0][DATAW-1:0] entries;
|
||||
reg [TOTAL_DEPTH-1:0][DATAW-1:0] entries;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < DATAW; ++i) begin
|
||||
if ((i >= (DATAW-RESETW)) && reset) begin
|
||||
for (integer j = 0; j < TOTAL_DEPTH; ++j)
|
||||
entries[j][i] <= 0;
|
||||
end else if (enable) begin
|
||||
for (integer j = 1; j < TOTAL_DEPTH; ++j)
|
||||
entries[j-1][i] <= entries[j][i];
|
||||
entries[TOTAL_DEPTH-1][i] <= data_in[i];
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < DATAW; ++i) begin
|
||||
if ((i >= (DATAW-RESETW)) && reset) begin
|
||||
for (integer j = 0; j < TOTAL_DEPTH; ++j)
|
||||
entries[j][i] <= 0;
|
||||
end else if (enable) begin
|
||||
for (integer j = 1; j < TOTAL_DEPTH; ++j)
|
||||
entries[j-1][i] <= entries[j][i];
|
||||
entries[TOTAL_DEPTH-1][i] <= data_in[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NTAPS; ++i) begin
|
||||
assign data_out[i] = entries[i*DEPTH];
|
||||
for (genvar i = 0; i < NTAPS; ++i) begin
|
||||
assign data_out[i] = entries[i*DEPTH];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (enable)
|
||||
for (genvar i = 0; i < NTAPS; ++i) begin
|
||||
assign data_out[i] = data_in;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -62,7 +62,7 @@ module VX_raster_edge #(
|
|||
VX_shift_register #(
|
||||
.DATAW (3 * `RASTER_DATA_BITS),
|
||||
.DEPTH (LATENCY)
|
||||
) shift_reg (
|
||||
) shift_reg1 (
|
||||
.clk (clk),
|
||||
`UNUSED_PIN (reset),
|
||||
.enable (enable),
|
||||
|
@ -76,12 +76,12 @@ module VX_raster_edge #(
|
|||
assign result_s[i] = sum[`RASTER_DATA_BITS-1:0];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
VX_shift_register #(
|
||||
.DATAW (3 * `RASTER_DATA_BITS),
|
||||
.DEPTH (LATENCY - `LATENCY_IMUL)
|
||||
) pipe_reg (
|
||||
) shift_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`UNUSED_PIN (reset),
|
||||
.enable (enable),
|
||||
.data_in (result_s),
|
||||
.data_out (result)
|
||||
|
|
|
@ -29,7 +29,7 @@ module VX_rop_blend #(
|
|||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam LATENCY = `LATENCY_IMUL + 2;
|
||||
localparam LATENCY = 3;
|
||||
|
||||
`UNUSED_VAR (dcrs)
|
||||
|
||||
|
|
|
@ -1,19 +1,5 @@
|
|||
`include "VX_rop_define.vh"
|
||||
|
||||
`define MULT8(clk, en, dst, src1, src2) \
|
||||
VX_multiplier #( \
|
||||
.A_WIDTH (8), \
|
||||
.B_WIDTH (8), \
|
||||
.R_WIDTH (16), \
|
||||
.LATENCY (`LATENCY_IMUL) \
|
||||
) __``dst ( \
|
||||
.clk (clk), \
|
||||
.enable (en), \
|
||||
.dataa (src1), \
|
||||
.datab (src2), \
|
||||
.result (dst) \
|
||||
)
|
||||
|
||||
module VX_rop_blend_multadd #(
|
||||
parameter LATENCY = 1
|
||||
) (
|
||||
|
@ -34,71 +20,62 @@ module VX_rop_blend_multadd #(
|
|||
output rgba_t color_out
|
||||
);
|
||||
|
||||
`STATIC_ASSERT((LATENCY > `LATENCY_IMUL), ("invalid parameter"))
|
||||
`STATIC_ASSERT((LATENCY == 3), ("invalid parameter"))
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam LATENCY_REM = LATENCY - `LATENCY_IMUL;
|
||||
// multiply-add
|
||||
|
||||
wire [15:0] prod_src_r, prod_src_g, prod_src_b, prod_src_a;
|
||||
wire [15:0] prod_dst_r, prod_dst_g, prod_dst_b, prod_dst_a;
|
||||
reg [15:0] prod_src_r, prod_src_g, prod_src_b, prod_src_a;
|
||||
reg [15:0] prod_dst_r, prod_dst_g, prod_dst_b, prod_dst_a;
|
||||
reg [16:0] sum_r, sum_g, sum_b, sum_a;
|
||||
|
||||
// src_color x src_factor
|
||||
`MULT8(clk, enable, prod_src_r, src_color.r, src_factor.r);
|
||||
`MULT8(clk, enable, prod_src_g, src_color.g, src_factor.g);
|
||||
`MULT8(clk, enable, prod_src_b, src_color.b, src_factor.b);
|
||||
`MULT8(clk, enable, prod_src_a, src_color.a, src_factor.a);
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
prod_src_r <= src_color.r * src_factor.r;
|
||||
prod_src_g <= src_color.g * src_factor.g;
|
||||
prod_src_b <= src_color.b * src_factor.b;
|
||||
prod_src_a <= src_color.a * src_factor.a;
|
||||
|
||||
// dst_color x dst_factor
|
||||
`MULT8(clk, enable, prod_dst_r, dst_color.r, dst_factor.r);
|
||||
`MULT8(clk, enable, prod_dst_g, dst_color.g, dst_factor.g);
|
||||
`MULT8(clk, enable, prod_dst_b, dst_color.b, dst_factor.b);
|
||||
`MULT8(clk, enable, prod_dst_a, dst_color.a, dst_factor.a);
|
||||
prod_dst_r <= dst_color.r * dst_factor.r;
|
||||
prod_dst_g <= dst_color.g * dst_factor.g;
|
||||
prod_dst_b <= dst_color.b * dst_factor.b;
|
||||
prod_dst_a <= dst_color.a * dst_factor.a;
|
||||
|
||||
reg [16:0] sum_r, sum_g, sum_b, sum_a;
|
||||
|
||||
// apply blend mode
|
||||
always @(*) begin
|
||||
case (mode_rgb)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
sum_r = prod_src_r + prod_dst_r + 16'hff;
|
||||
sum_g = prod_src_g + prod_dst_g + 16'hff;
|
||||
sum_b = prod_src_b + prod_dst_b + 16'hff;
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB: begin
|
||||
sum_r = prod_src_r - prod_dst_r + 16'hff;
|
||||
sum_g = prod_src_g - prod_dst_g + 16'hff;
|
||||
sum_b = prod_src_b - prod_dst_b + 16'hff;
|
||||
end
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
sum_r = prod_dst_r - prod_src_r + 16'hff;
|
||||
sum_g = prod_dst_g - prod_src_g + 16'hff;
|
||||
sum_b = prod_dst_b - prod_src_b + 16'hff;
|
||||
end
|
||||
default: begin
|
||||
sum_r = 'x;
|
||||
sum_g = 'x;
|
||||
sum_b = 'x;
|
||||
end
|
||||
endcase
|
||||
case (mode_a)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
sum_a = prod_src_a + prod_dst_a + 16'hff;
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB: begin
|
||||
sum_a = prod_src_a - prod_dst_a + 16'hff;
|
||||
end
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
sum_a = prod_dst_a - prod_src_a + 16'hff;
|
||||
end
|
||||
default: begin
|
||||
sum_a = 'x;
|
||||
end
|
||||
endcase
|
||||
case (mode_rgb)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
sum_r <= prod_src_r + prod_dst_r + 16'h80;
|
||||
sum_g <= prod_src_g + prod_dst_g + 16'h80;
|
||||
sum_b <= prod_src_b + prod_dst_b + 16'h80;
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB: begin
|
||||
sum_r <= prod_src_r - prod_dst_r + 16'h80;
|
||||
sum_g <= prod_src_g - prod_dst_g + 16'h80;
|
||||
sum_b <= prod_src_b - prod_dst_b + 16'h80;
|
||||
end
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
sum_r <= prod_dst_r - prod_src_r + 16'h80;
|
||||
sum_g <= prod_dst_g - prod_src_g + 16'h80;
|
||||
sum_b <= prod_dst_b - prod_src_b + 16'h80;
|
||||
end
|
||||
endcase
|
||||
case (mode_a)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
sum_a <= prod_src_a + prod_dst_a + 16'h80;
|
||||
end
|
||||
`ROP_BLEND_MODE_SUB: begin
|
||||
sum_a <= prod_src_a - prod_dst_a + 16'h80;
|
||||
end
|
||||
`ROP_BLEND_MODE_REV_SUB: begin
|
||||
sum_a <= prod_dst_a - prod_src_a + 16'h80;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// clamp to (0, 255 * 256)
|
||||
|
||||
reg [15:0] clamp_r, clamp_g, clamp_b, clamp_a;
|
||||
|
||||
// clamp to (0, 255 * 256)
|
||||
always @(*) begin
|
||||
case (mode_rgb)
|
||||
`ROP_BLEND_MODE_ADD: begin
|
||||
|
@ -132,18 +109,17 @@ module VX_rop_blend_multadd #(
|
|||
endcase
|
||||
end
|
||||
|
||||
rgba_t result;
|
||||
// divide by 255
|
||||
|
||||
// divide by 255
|
||||
rgba_t result;
|
||||
assign result.r = 8'((clamp_r + (clamp_r >> 8)) >> 8);
|
||||
assign result.g = 8'((clamp_g + (clamp_g >> 8)) >> 8);
|
||||
assign result.b = 8'((clamp_b + (clamp_b >> 8)) >> 8);
|
||||
assign result.a = 8'((clamp_a + (clamp_a >> 8)) >> 8);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (32),
|
||||
.DEPTH (LATENCY_REM)
|
||||
) shift_reg (
|
||||
VX_pipe_register #(
|
||||
.DATAW (32)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
`UNUSED_PIN (reset),
|
||||
.enable (enable),
|
||||
|
|
|
@ -13,7 +13,7 @@ module VX_tex_lerp (
|
|||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [15:0] p1, p2;
|
||||
reg [16:0] sum;
|
||||
reg [15:0] sum;
|
||||
reg [7:0] res;
|
||||
|
||||
wire [7:0] sub = (8'hff - frac);
|
||||
|
@ -22,7 +22,7 @@ module VX_tex_lerp (
|
|||
if (enable) begin
|
||||
p1 <= in1 * sub;
|
||||
p2 <= in2 * frac;
|
||||
sum <= p1 + p2 + 17'h80;
|
||||
sum <= p1 + p2 + 16'h80;
|
||||
res <= 8'((sum + (sum >> 8)) >> 8);
|
||||
end
|
||||
end
|
||||
|
|
|
@ -45,7 +45,6 @@ module VX_tex_sampler #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + REQ_INFOW + (NUM_LANES * 2 * `TEX_BLEND_FRAC) + (NUM_LANES * 4 * 32)),
|
||||
.DEPTH (1),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue