mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 05:17:45 -04:00
draw3d test updates
This commit is contained in:
parent
86d43efcac
commit
0deacb46c5
22 changed files with 536 additions and 200 deletions
|
@ -381,8 +381,8 @@
|
|||
`define TEX_FXD_FRAC (`TEX_DIM_BITS+`TEX_SUBPIXEL_BITS)
|
||||
|
||||
`define TEX_STATE_ADDR 0
|
||||
`define TEX_STATE_WIDTH 1
|
||||
`define TEX_STATE_HEIGHT 2
|
||||
`define TEX_STATE_LOGWIDTH 1
|
||||
`define TEX_STATE_LOGHEIGHT 2
|
||||
`define TEX_STATE_FORMAT 3
|
||||
`define TEX_STATE_FILTER 4
|
||||
`define TEX_STATE_WRAPU 5
|
||||
|
@ -393,8 +393,8 @@
|
|||
`define CSR_TEX_STATE_BEGIN 12'h7C0
|
||||
`define CSR_TEX_STAGE `CSR_TEX_STATE_BEGIN
|
||||
`define CSR_TEX_ADDR (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_ADDR)
|
||||
`define CSR_TEX_WIDTH (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_WIDTH)
|
||||
`define CSR_TEX_HEIGHT (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_HEIGHT)
|
||||
`define CSR_TEX_LOGWIDTH (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_LOGWIDTH)
|
||||
`define CSR_TEX_LOGHEIGHT (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_LOGHEIGHT)
|
||||
`define CSR_TEX_FORMAT (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_FORMAT)
|
||||
`define CSR_TEX_FILTER (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_FILTER)
|
||||
`define CSR_TEX_WRAPU (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_WRAPU)
|
||||
|
@ -406,21 +406,21 @@
|
|||
|
||||
// Raster Units ///////////////////////////////////////////////////////////////
|
||||
|
||||
`define RASTER_STATE_PIDX_ADDR 0
|
||||
`define RASTER_STATE_PIDX_SIZE 1
|
||||
`define RASTER_DIM_BITS 15
|
||||
|
||||
`define RASTER_STATE_TBUF_ADDR 0
|
||||
`define RASTER_STATE_TILE_COUNT 1
|
||||
`define RASTER_STATE_PBUF_ADDR 2
|
||||
`define RASTER_STATE_PBUF_STRIDE 3
|
||||
`define RASTER_STATE_TILE_XY 4
|
||||
`define RASTER_STATE_TILE_WH 5
|
||||
`define RASTER_STATE_COUNT 6
|
||||
`define RASTER_STATE_TILE_LOGSIZE 4
|
||||
`define RASTER_STATE_COUNT 5
|
||||
|
||||
`define CSR_RASTER_STATE_BEGIN `CSR_TEX_STATE_END
|
||||
`define CSR_RASTER_PIDX_ADDR (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PIDX_ADDR)
|
||||
`define CSR_RASTER_PIDX_SIZE (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PIDX_SIZE)
|
||||
`define CSR_RASTER_TBUF_ADDR (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TBUF_ADDR)
|
||||
`define CSR_RASTER_TILE_COUNT (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_COUNT)
|
||||
`define CSR_RASTER_PBUF_ADDR (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PBUF_ADDR)
|
||||
`define CSR_RASTER_PBUF_STRIDE (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PBUF_STRIDE)
|
||||
`define CSR_RASTER_TILE_XY (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_XY)
|
||||
`define CSR_RASTER_TILE_WH (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_WH)
|
||||
`define CSR_RASTER_TILE_LOGSIZE (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_LOGSIZE)
|
||||
`define CSR_RASTER_STATE_END (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_COUNT)
|
||||
|
||||
`define CSR_RASTER_STATE(addr) ((addr) - `CSR_RASTER_STATE_BEGIN)
|
||||
|
|
|
@ -22,11 +22,11 @@ module VX_raster_csr (
|
|||
csrs <= 0;
|
||||
end else if (csr_wr_valid) begin
|
||||
case (csr_wr_addr)
|
||||
`CSR_RASTER_PIDX_ADDR: begin
|
||||
csrs.pidx_addr <= csr_wr_data[31:0];
|
||||
`CSR_RASTER_TBUF_ADDR: begin
|
||||
csrs.tbuf_addr <= csr_wr_data[31:0];
|
||||
end
|
||||
`CSR_RASTER_PIDX_SIZE: begin
|
||||
csrs.pidx_size <= csr_wr_data[31:0];
|
||||
`CSR_RASTER_TILE_COUNT: begin
|
||||
csrs.tile_count <= csr_wr_data[31:0];
|
||||
end
|
||||
`CSR_RASTER_PBUF_ADDR: begin
|
||||
csrs.pbuf_addr <= csr_wr_data[31:0];
|
||||
|
@ -34,13 +34,8 @@ module VX_raster_csr (
|
|||
`CSR_RASTER_PBUF_STRIDE: begin
|
||||
csrs.pbuf_stride <= csr_wr_data[31:0];
|
||||
end
|
||||
`CSR_RASTER_TILE_XY: begin
|
||||
csrs.tile_left <= csr_wr_data[15:0];
|
||||
csrs.tile_top <= csr_wr_data[31:16];
|
||||
end
|
||||
`CSR_RASTER_TILE_WH: begin
|
||||
csrs.tile_width <= csr_wr_data[15:0];
|
||||
csrs.tile_height <= csr_wr_data[31:16];
|
||||
`CSR_RASTER_TILE_LOGSIZE: begin
|
||||
csrs.tile_logsize <= csr_wr_data[15:0];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -9,12 +9,11 @@ task trace_raster_state (
|
|||
input [`CSR_ADDR_BITS-1:0] state
|
||||
);
|
||||
case (state)
|
||||
`CSR_RASTER_PIDX_ADDR: dpi_trace("PIDX_ADDR");
|
||||
`CSR_RASTER_PIDX_SIZE: dpi_trace("PIDX_SIZE");
|
||||
`CSR_RASTER_TBUF_ADDR: dpi_trace("TBUF_ADDR");
|
||||
`CSR_RASTER_TILE_COUNT: dpi_trace("TILE_COUNT");
|
||||
`CSR_RASTER_PBUF_ADDR: dpi_trace("PBUF_ADDR");
|
||||
`CSR_RASTER_PBUF_STRIDE: dpi_trace("PBUF_STRIDE");
|
||||
`CSR_RASTER_TILE_XY: dpi_trace("TILE_XY");
|
||||
`CSR_RASTER_TILE_WH: dpi_trace("TILE_WH");
|
||||
`CSR_RASTER_TILE_LOGSIZE:dpi_trace("TILE_LOGSIZE);
|
||||
default: dpi_trace("??");
|
||||
endcase
|
||||
endtask
|
||||
|
|
|
@ -3,17 +3,16 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define RASTER_DIM_BITS 15
|
||||
|
||||
package raster_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] pidx_addr;
|
||||
logic [31:0] pidx_size;
|
||||
logic [31:0] tbuf_addr;
|
||||
logic [31:0] tile_count;
|
||||
logic [31:0] pbuf_addr;
|
||||
logic [31:0] pbuf_stride;
|
||||
logic [15:0] tile_left;
|
||||
logic [15:0] tile_top;
|
||||
logic [15:0] tile_width;
|
||||
logic [15:0] tile_height;
|
||||
logic [15:0] tile_logsize;
|
||||
} raster_csrs_t;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -60,10 +60,10 @@ module VX_tex_csr #(
|
|||
`CSR_TEX_FILTER: begin
|
||||
tex_filter[csr_tex_stage] <= csr_wr_data[`TEX_FILTER_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_WIDTH: begin
|
||||
`CSR_TEX_LOGWIDTH: begin
|
||||
tex_logdims[csr_tex_stage][0] <= csr_wr_data[`TEX_LOD_BITS-1:0];
|
||||
end
|
||||
`CSR_TEX_HEIGHT: begin
|
||||
`CSR_TEX_LOGHEIGHT: begin
|
||||
tex_logdims[csr_tex_stage][1] <= csr_wr_data[`TEX_LOD_BITS-1:0];
|
||||
end
|
||||
default: begin
|
||||
|
|
|
@ -36,15 +36,15 @@ task trace_tex_state (
|
|||
input [`CSR_ADDR_BITS-1:0] state
|
||||
);
|
||||
case (state)
|
||||
`CSR_TEX_ADDR: dpi_trace("ADDR");
|
||||
`CSR_TEX_WIDTH: dpi_trace("WIDTH");
|
||||
`CSR_TEX_HEIGHT: dpi_trace("HEIGHT");
|
||||
`CSR_TEX_FORMAT: dpi_trace("FORMAT");
|
||||
`CSR_TEX_FILTER: dpi_trace("FILTER");
|
||||
`CSR_TEX_WRAPU: dpi_trace("WRAPU");
|
||||
`CSR_TEX_WRAPV: dpi_trace("WRAPV");
|
||||
`CSR_TEX_ADDR: dpi_trace("ADDR");
|
||||
`CSR_TEX_LOGWIDTH: dpi_trace("LOGWIDTH");
|
||||
`CSR_TEX_LOGHEIGHT: dpi_trace("LOGHEIGHT");
|
||||
`CSR_TEX_FORMAT: dpi_trace("FORMAT");
|
||||
`CSR_TEX_FILTER: dpi_trace("FILTER");
|
||||
`CSR_TEX_WRAPU: dpi_trace("WRAPU");
|
||||
`CSR_TEX_WRAPV: dpi_trace("WRAPV");
|
||||
//`CSR_TEX_MIPOFF:
|
||||
default: dpi_trace("MIPOFF");
|
||||
default: dpi_trace("MIPOFF");
|
||||
endcase
|
||||
endtask
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cocogfx/include/fixed.h>
|
||||
#include <cocogfx/include/fixed.hpp>
|
||||
#include <bitmanip.h>
|
||||
|
||||
using namespace cocogfx;
|
||||
|
@ -23,11 +23,11 @@ enum class TexFormat {
|
|||
};
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
T Clamp(Fixed<F,T> fx, WrapMode mode) {
|
||||
T Clamp(TFixed<F,T> fx, WrapMode mode) {
|
||||
switch (mode) {
|
||||
case WrapMode::Clamp: return (fx.data() < 0) ? 0 : ((fx.data() > Fixed<F,T>::MASK) ? Fixed<F,T>::MASK : fx.data());
|
||||
case WrapMode::Repeat: return (fx.data() & Fixed<F,T>::MASK);
|
||||
case WrapMode::Mirror: return (bit_get(fx.data(), Fixed<F,T>::FRAC) ? ~fx.data() : fx.data());
|
||||
case WrapMode::Clamp: return (fx.data() < 0) ? 0 : ((fx.data() > TFixed<F,T>::MASK) ? TFixed<F,T>::MASK : fx.data());
|
||||
case WrapMode::Repeat: return (fx.data() & TFixed<F,T>::MASK);
|
||||
case WrapMode::Mirror: return (bit_get(fx.data(), TFixed<F,T>::FRAC) ? ~fx.data() : fx.data());
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
|
@ -121,8 +121,8 @@ inline uint32_t Lerp8888(uint32_t a, uint32_t b, uint32_t f) {
|
|||
}
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
void TexAddressLinear(Fixed<F,T> fu,
|
||||
Fixed<F,T> fv,
|
||||
void TexAddressLinear(TFixed<F,T> fu,
|
||||
TFixed<F,T> fv,
|
||||
uint32_t log_width,
|
||||
uint32_t log_height,
|
||||
WrapMode wrapu,
|
||||
|
@ -134,16 +134,16 @@ void TexAddressLinear(Fixed<F,T> fu,
|
|||
uint32_t* alpha,
|
||||
uint32_t* beta
|
||||
) {
|
||||
auto delta_x = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_width);
|
||||
auto delta_y = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_height);
|
||||
auto delta_x = TFixed<F,T>::make(TFixed<F,T>::HALF >> log_width);
|
||||
auto delta_y = TFixed<F,T>::make(TFixed<F,T>::HALF >> log_height);
|
||||
|
||||
uint32_t u0 = Clamp(fu - delta_x, wrapu);
|
||||
uint32_t u1 = Clamp(fu + delta_x, wrapu);
|
||||
uint32_t v0 = Clamp(fv - delta_y, wrapv);
|
||||
uint32_t v1 = Clamp(fv + delta_y, wrapv);
|
||||
|
||||
uint32_t shift_u = (Fixed<F,T>::FRAC - log_width);
|
||||
uint32_t shift_v = (Fixed<F,T>::FRAC - log_height);
|
||||
uint32_t shift_u = (TFixed<F,T>::FRAC - log_width);
|
||||
uint32_t shift_v = (TFixed<F,T>::FRAC - log_height);
|
||||
|
||||
uint32_t x0s = (u0 << 8) >> shift_u;
|
||||
uint32_t y0s = (v0 << 8) >> shift_v;
|
||||
|
@ -165,8 +165,8 @@ void TexAddressLinear(Fixed<F,T> fu,
|
|||
}
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
void TexAddressPoint(Fixed<F,T> fu,
|
||||
Fixed<F,T> fv,
|
||||
void TexAddressPoint(TFixed<F,T> fu,
|
||||
TFixed<F,T> fv,
|
||||
uint32_t log_width,
|
||||
uint32_t log_height,
|
||||
WrapMode wrapu,
|
||||
|
@ -176,8 +176,8 @@ void TexAddressPoint(Fixed<F,T> fu,
|
|||
uint32_t u = Clamp(fu, wrapu);
|
||||
uint32_t v = Clamp(fv, wrapv);
|
||||
|
||||
uint32_t x = u >> (Fixed<F,T>::FRAC - log_width);
|
||||
uint32_t y = v >> (Fixed<F,T>::FRAC - log_height);
|
||||
uint32_t x = u >> (TFixed<F,T>::FRAC - log_width);
|
||||
uint32_t y = v >> (TFixed<F,T>::FRAC - log_height);
|
||||
|
||||
*addr = x + (y << log_width);
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ RTL_DIR = ../../hw/rtl
|
|||
DPI_DIR = ../../hw/dpi
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=C++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../../hw -I../../common
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
|
|
|
@ -46,11 +46,11 @@ public:
|
|||
|
||||
uint32_t read(uint32_t stage, int32_t u, int32_t v, int32_t lod, TraceData* trace_data) {
|
||||
auto& states = core_->global_csrs_.tex_csrs.at(stage);
|
||||
auto xu = Fixed<TEX_FXD_FRAC>::make(u);
|
||||
auto xv = Fixed<TEX_FXD_FRAC>::make(v);
|
||||
auto xu = TFixed<TEX_FXD_FRAC>::make(u);
|
||||
auto xv = TFixed<TEX_FXD_FRAC>::make(v);
|
||||
auto base_addr = states.at(TEX_STATE_ADDR) + states.at(TEX_STATE_MIPOFF(lod));
|
||||
auto log_width = std::max<int32_t>(states.at(TEX_STATE_WIDTH) - lod, 0);
|
||||
auto log_height = std::max<int32_t>(states.at(TEX_STATE_HEIGHT) - lod, 0);
|
||||
auto log_width = std::max<int32_t>(states.at(TEX_STATE_LOGWIDTH) - lod, 0);
|
||||
auto log_height = std::max<int32_t>(states.at(TEX_STATE_LOGHEIGHT) - lod, 0);
|
||||
auto format = (TexFormat)states.at(TEX_STATE_FORMAT);
|
||||
auto filter = (FilterMode)states.at(TEX_STATE_FILTER);
|
||||
auto wrapu = (WrapMode)states.at(TEX_STATE_WRAPU);
|
||||
|
|
|
@ -11,14 +11,14 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -std=c++14 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
VX_SRCS = kernel.cpp
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
|
||||
|
||||
|
|
|
@ -7,33 +7,35 @@
|
|||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
typedef struct {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
uint32_t c;
|
||||
float u;
|
||||
float v;
|
||||
} vtx_t;
|
||||
float u;
|
||||
float v;
|
||||
} vertex_t;
|
||||
|
||||
typedef struct {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
} rast_vtx_t;
|
||||
|
||||
typedef struct {
|
||||
vtx_t v0;
|
||||
vtx_t v1;
|
||||
vtx_t v2;
|
||||
} prim_t;
|
||||
rast_vtx_t v0;
|
||||
rast_vtx_t v1;
|
||||
rast_vtx_t v2;
|
||||
} rast_prim_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t top;
|
||||
uint32_t left;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t num_prims;
|
||||
uint32_t* indices;
|
||||
} tile_t;
|
||||
uint32_t tileXY;
|
||||
uint32_t num_prims;
|
||||
} rast_tile_header_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_tiles;
|
||||
uint32_t tiles_addr;
|
||||
uint32_t prims_addr;
|
||||
uint32_t vts_addr;
|
||||
uint32_t dst_addr;
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
|
|
|
@ -14,17 +14,6 @@ void kernel_body(int task_id, tile_arg_t* arg) {
|
|||
int main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
|
||||
// configure raster unit
|
||||
|
||||
// configure rop unit
|
||||
|
||||
tile_arg_t targ;
|
||||
targ.state = arg;
|
||||
|
||||
vx_spawn_tasks(arg->num_tiles, (vx_spawn_tasks_cb)kernel_body, &targ);
|
||||
/*for (uint32_t t=0; t < arg->num_tiles; ++t) {
|
||||
kernel_body(t, &targ);
|
||||
}*/
|
||||
|
||||
// TODO
|
||||
return 0;
|
||||
}
|
|
@ -4,10 +4,12 @@
|
|||
#include <string.h>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <array>
|
||||
#include <assert.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
#include "utils.h"
|
||||
#include "model_quad.h"
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
|
@ -24,35 +26,48 @@ using namespace cocogfx;
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
const char* input_file = "soccer.png";
|
||||
const char* output_file = "output.png";
|
||||
uint32_t dst_width = 64;
|
||||
ePixelFormat src_format = FORMAT_A8R8G8B8;
|
||||
int src_wrap = 0;
|
||||
int src_filter = 0; // 0-> point, 1->bilinear
|
||||
uint32_t dst_width = 64;
|
||||
uint32_t dst_height = 64;
|
||||
uint32_t tile_size = 64;
|
||||
const model_t& model = model_quad;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
uint64_t tilebuf_addr;
|
||||
uint64_t primbuf_addr;
|
||||
uint64_t srcbuf_addr;
|
||||
uint64_t dstbuf_addr;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex 3D Rendering Test." << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-o image] [-u width] [-v height] [-h: help]" << std::endl;
|
||||
std::cout << "Usage: [-i texture] [-o output] [-w width] [-h height] [-t tilesize]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "k:o:u:v:h?")) != -1) {
|
||||
while ((c = getopt(argc, argv, "k:o:w:h:?")) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
input_file = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
output_file = optarg;
|
||||
break;
|
||||
case 'u':
|
||||
case 'w':
|
||||
dst_width = std::atoi(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
case 'h':
|
||||
dst_height = std::atoi(optarg);
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
case 't':
|
||||
tile_size = std::atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
|
@ -69,19 +84,18 @@ void cleanup() {
|
|||
vx_buf_free(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_mem_free(device, kernel_arg.tiles_addr);
|
||||
vx_mem_free(device, kernel_arg.prims_addr);
|
||||
vx_mem_free(device, kernel_arg.dst_addr);
|
||||
vx_mem_free(device, tilebuf_addr);
|
||||
vx_mem_free(device, primbuf_addr);
|
||||
vx_mem_free(device, srcbuf_addr);
|
||||
vx_mem_free(device, dstbuf_addr);
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp) {
|
||||
(void)bpp;
|
||||
int render(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t width,
|
||||
uint32_t height) {
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// start device
|
||||
|
@ -107,31 +121,49 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
}
|
||||
|
||||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
//dump_image(dst_pixels, width, height, bpp);
|
||||
std::cout << "save output image" << std::endl;
|
||||
//dump_image(dst_pixels, width, height, 4);
|
||||
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void allocate_tiles() {
|
||||
// TODO
|
||||
}
|
||||
int main(int argc, char *argv[]) {
|
||||
std::vector<uint8_t> tilebuf;
|
||||
std::vector<uint8_t> primbuf;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::vector<tile_t> tiles;
|
||||
std::vector<prim_t> primitives;
|
||||
std::vector<uint8_t> srcbuf;
|
||||
std::vector<uint32_t> mip_offsets;
|
||||
uint32_t src_width;
|
||||
uint32_t src_height;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t dst_bpp = 4;
|
||||
uint32_t dst_bufsize = dst_bpp * dst_width * dst_height;
|
||||
if (!ispow2(tile_size)) {
|
||||
std::cout << "Error: only power of two tile_size supported: tile_size=" << tile_size << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
allocate_tiles();
|
||||
|
||||
uint32_t tile_bufsize = tiles.size() * sizeof(tile_t);
|
||||
uint32_t prim_bufsize = primitives.size() * sizeof(prim_t);
|
||||
if (!ispow2(dst_width)) {
|
||||
std::cout << "Error: only power of two dst_width supported: dst_width=" << dst_width << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!ispow2(dst_height)) {
|
||||
std::cout << "Error: only power of two dst_height supported: dst_height=" << dst_height << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != (dst_width % tile_size)) {
|
||||
std::cout << "Error: dst_with must be divisible by tile_size" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != (dst_height % tile_size)) {
|
||||
std::cout << "Error: dst_height must be divisible by tile_size" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
|
@ -144,36 +176,50 @@ int main(int argc, char *argv[]) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
uint64_t max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
{
|
||||
std::vector<uint8_t> staging;
|
||||
RT_CHECK(LoadImage(input_file, src_format, staging, &src_width, &src_height));
|
||||
|
||||
// check power of two support
|
||||
if (!ispow2(src_width) || !ispow2(src_height)) {
|
||||
std::cout << "Error: only power of two textures supported: width=" << src_width << ", heigth=" << src_height << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
RT_CHECK(GenerateMipmaps(srcbuf, mip_offsets, staging, src_format, src_width, src_height, src_width * 4));
|
||||
}
|
||||
|
||||
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
|
||||
std::cout << "destination staging_buf: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl;
|
||||
uint32_t src_logwidth = log2ceil(src_width);
|
||||
uint32_t src_logheight = log2ceil(src_height);
|
||||
|
||||
uint32_t dstbuf_size = dst_width * dst_height * 4;
|
||||
|
||||
uint32_t logTileSize = log2ceil(tile_size);
|
||||
|
||||
// Perform tile binning
|
||||
auto num_tiles = Binning(tilebuf, primbuf, model, dst_width, dst_height, tile_size);
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
uint64_t tile_addr, prim_addr, dst_addr;
|
||||
RT_CHECK(vx_mem_alloc(device, tile_bufsize, &tile_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, prim_bufsize, &prim_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dst_bufsize, &dst_addr));
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, tilebuf.size(), &tilebuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, primbuf.size(), &primbuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, srcbuf.size(), &srcbuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, dstbuf_size, &dstbuf_addr));
|
||||
|
||||
std::cout << "tile_addr=0x" << std::hex << tile_addr << std::endl;
|
||||
std::cout << "prim_addr=0x" << std::hex << prim_addr << std::endl;
|
||||
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
|
||||
std::cout << "tilebuf_addr=0x" << std::hex << tilebuf_addr << std::endl;
|
||||
std::cout << "primbuf_addr=0x" << std::hex << primbuf_addr << std::endl;
|
||||
std::cout << "srcbuf_addr=0x" << std::hex << srcbuf_addr << std::endl;
|
||||
std::cout << "dstbuf_addr=0x" << std::hex << dstbuf_addr << std::endl;
|
||||
|
||||
// allocate staging shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
|
||||
std::max<uint32_t>(tile_bufsize,
|
||||
std::max<uint32_t>(prim_bufsize, dst_bufsize)));
|
||||
std::max<uint32_t>(tilebuf.size(),
|
||||
std::max<uint32_t>(primbuf.size(), dstbuf_size)));
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
|
@ -181,9 +227,9 @@ int main(int argc, char *argv[]) {
|
|||
{
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
kernel_arg.dst_stride = dst_bpp;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width;
|
||||
kernel_arg.dst_addr = dst_addr;
|
||||
kernel_arg.dst_stride = 4;
|
||||
kernel_arg.dst_pitch = 4 * dst_width;
|
||||
kernel_arg.dst_addr = dstbuf_addr;
|
||||
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
|
@ -193,36 +239,55 @@ int main(int argc, char *argv[]) {
|
|||
// upload tiles buffer
|
||||
std::cout << "upload tiles buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (tile_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < tiles.size(); ++i) {
|
||||
buf_ptr[i] = tiles.at(i);
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.tiles_addr, tile_bufsize, 0));
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
|
||||
memcpy(buf_ptr, tilebuf.data(), tilebuf.size());
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, tilebuf_addr, tilebuf.size(), 0));
|
||||
}
|
||||
|
||||
// upload primitives buffer
|
||||
std::cout << "upload primitives buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (prim_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < primitives.size(); ++i) {
|
||||
buf_ptr[i] = primitives.at(i);
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.prims_addr, prim_bufsize, 0));
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
|
||||
memcpy(buf_ptr, primbuf.data(), primbuf.size());
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, primbuf_addr, primbuf.size(), 0));
|
||||
}
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (uint32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < (dst_bufsize/4); ++i) {
|
||||
for (uint32_t i = 0; i < (dstbuf_size/4); ++i) {
|
||||
buf_ptr[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_bufsize, 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dstbuf_size, 0));
|
||||
}
|
||||
|
||||
// configure texture units
|
||||
vx_csr_write(device, CSR_TEX_STAGE, 0);
|
||||
vx_csr_write(device, CSR_TEX_LOGWIDTH, src_logwidth);
|
||||
vx_csr_write(device, CSR_TEX_LOGHEIGHT, src_logheight);
|
||||
vx_csr_write(device, CSR_TEX_FORMAT, src_format);
|
||||
vx_csr_write(device, CSR_TEX_WRAPU, src_wrap);
|
||||
vx_csr_write(device, CSR_TEX_WRAPV, src_wrap);
|
||||
vx_csr_write(device, CSR_TEX_FILTER, src_filter);
|
||||
vx_csr_write(device, CSR_TEX_ADDR, srcbuf_addr);
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
vx_csr_write(device, CSR_TEX_MIPOFF(i), mip_offsets.at(i));
|
||||
};
|
||||
|
||||
// configure raster units
|
||||
vx_csr_write(device, CSR_RASTER_TBUF_ADDR, tilebuf_addr);
|
||||
vx_csr_write(device, CSR_RASTER_TILE_COUNT, num_tiles);
|
||||
vx_csr_write(device, CSR_RASTER_PBUF_ADDR, primbuf_addr);
|
||||
vx_csr_write(device, CSR_RASTER_PBUF_STRIDE, sizeof(rast_prim_t));
|
||||
vx_csr_write(device, CSR_RASTER_TILE_LOGSIZE, logTileSize);
|
||||
|
||||
// configure rop units
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp));
|
||||
std::cout << "render" << std::endl;
|
||||
RT_CHECK(render(kernel_arg, dstbuf_size, dst_width, dst_height));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
|
|
15
tests/regression/draw3d/model.h
Normal file
15
tests/regression/draw3d/model.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include <vector>
|
||||
|
||||
typedef struct {
|
||||
uint32_t i0;
|
||||
uint32_t i1;
|
||||
uint32_t i2;
|
||||
} primitive_t;
|
||||
|
||||
struct model_t {
|
||||
std::vector<vertex_t> vertives;
|
||||
std::vector<primitive_t> primitives;
|
||||
};
|
55
tests/regression/draw3d/model_quad.h
Normal file
55
tests/regression/draw3d/model_quad.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
#pragma once
|
||||
|
||||
#include "model.h"
|
||||
|
||||
const model_t model_quad = {
|
||||
{
|
||||
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
|
||||
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
|
||||
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000},
|
||||
{-6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 1.000000, 0.000000},
|
||||
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000},
|
||||
{-6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 1.000000, 0.000000},
|
||||
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
|
||||
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
|
||||
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
|
||||
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
|
||||
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000},
|
||||
{-6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 1.000000, 0.000000},
|
||||
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
|
||||
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
|
||||
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
|
||||
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000}
|
||||
}, {
|
||||
{2, 1, 3},
|
||||
{3, 1, 0},
|
||||
{1, 2, 0},
|
||||
{0, 2, 3},
|
||||
{4, 0, 5},
|
||||
{5, 0, 1},
|
||||
{1, 5, 0},
|
||||
{0, 5, 4},
|
||||
{3, 0, 7},
|
||||
{7, 0, 4},
|
||||
{5, 4, 1},
|
||||
{1, 4, 0}
|
||||
}
|
||||
};
|
BIN
tests/regression/draw3d/soccer.png
Normal file
BIN
tests/regression/draw3d/soccer.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.4 KiB |
|
@ -3,28 +3,240 @@
|
|||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <cocogfx/include/tga.h>
|
||||
#include <cocogfx/include/png.h>
|
||||
#include <string.h>
|
||||
#include <unordered_map>
|
||||
#include <cocogfx/include/tga.hpp>
|
||||
#include <cocogfx/include/png.hpp>
|
||||
#include <cocogfx/include/fixed.hpp>
|
||||
#include <cocogfx/include/math.hpp>
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
using fixed16_t = TFixed<16>;
|
||||
|
||||
using vec2d_f_t = TVector2<float>;
|
||||
using vec2d_fx_t = TVector2<fixed16_t>;
|
||||
|
||||
using vec3d_fx_t = TVector3<fixed16_t>;
|
||||
|
||||
using vec4d_f_t = TVector4<float>;
|
||||
|
||||
using rect_f_t = TRect<float>;
|
||||
using rect_u_t = TRect<uint32_t>;
|
||||
|
||||
static fixed16_t fxZero(0);
|
||||
static fixed16_t fxHalf(0.5f);
|
||||
|
||||
// Evaluate edge function
|
||||
static fixed16_t evalEdgeFunction(const vec3d_fx_t& e, uint32_t x, uint32_t y) {
|
||||
return (e.x * x) + (e.y * y) + e.z;
|
||||
}
|
||||
|
||||
// Calculate the edge extents for tile corners
|
||||
static fixed16_t calcEdgeExtents(const vec3d_fx_t& e, uint32_t logTileSize) {
|
||||
vec2d_fx_t corners[4] = {{fxZero, fxZero}, // 00
|
||||
{e.x, fxZero}, // 10
|
||||
{fxZero, e.y}, // 01
|
||||
{e.x, e.y}}; // 11
|
||||
auto i = (e.y >= fxZero) ? ((e.x >= fxZero) ? 3 : 2) : (e.x >= fxZero) ? 1 : 0;
|
||||
return (corners[i].x + corners[i].y) << logTileSize;
|
||||
}
|
||||
|
||||
static float EdgeEquation(vec3d_fx_t edges[3],
|
||||
const vec4d_f_t& v0,
|
||||
const vec4d_f_t& v1,
|
||||
const vec4d_f_t& v2) {
|
||||
// Calculate edge equation matrix
|
||||
auto a0 = (v1.y * v2.w) - (v2.y * v1.w);
|
||||
auto a1 = (v2.y * v0.w) - (v0.y * v2.w);
|
||||
auto a2 = (v0.y * v1.w) - (v1.y * v0.w);
|
||||
|
||||
auto b0 = (v2.x * v1.w) - (v1.x * v2.w);
|
||||
auto b1 = (v0.x * v2.w) - (v2.x * v0.w);
|
||||
auto b2 = (v1.x * v0.w) - (v0.x * v1.w);
|
||||
|
||||
auto c0 = (v1.x * v2.y) - (v2.x * v1.y);
|
||||
auto c1 = (v2.x * v0.y) - (v0.x * v2.y);
|
||||
auto c2 = (v0.x * v1.y) - (v1.x * v0.y);
|
||||
|
||||
// Normalize the matrix
|
||||
#define NORMALIZE(x, y, z) { auto t = 1.0 / (std::abs(x) + std::abs(y)); x *= t; y *= t; z *= t; }
|
||||
NORMALIZE(a0, b0, c0)
|
||||
NORMALIZE(a1, b1, c1)
|
||||
NORMALIZE(a2, b2, c2)
|
||||
|
||||
// Convert the edge equation to fixedpoint
|
||||
edges[0] = {fixed16_t(a0), fixed16_t(b0), fixed16_t(c0)};
|
||||
edges[1] = {fixed16_t(a1), fixed16_t(b1), fixed16_t(c1)};
|
||||
edges[2] = {fixed16_t(a2), fixed16_t(b2), fixed16_t(c2)};
|
||||
|
||||
/*printf("E0.x=%f, E0.y=%f, E0.z=%f, E1.x=%f, E1.y=%f, E1.z=%f, E2.x=%f, E2.y=%f, E2.z=%f\n",
|
||||
float(edges[0].x), float(edges[0].y), float(edges[0].z),
|
||||
float(edges[1].x), float(edges[1].y), float(edges[1].z),
|
||||
float(edges[2].x), float(edges[2].y), float(edges[2].z));*/
|
||||
|
||||
auto det = c0 * v0.w + c1 * v1.w + c2 * v2.w;
|
||||
|
||||
return det;
|
||||
}
|
||||
|
||||
// traverse model primitives and do tile assignment
|
||||
uint32_t Binning(std::vector<uint8_t>& tilebuf,
|
||||
std::vector<uint8_t>& primbuf,
|
||||
const model_t& model,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t tileSize) {
|
||||
|
||||
uint32_t logTileSize = log2ceil(tileSize);
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint32_t>> tiles;
|
||||
|
||||
uint32_t num_prims = 0;
|
||||
|
||||
for (uint32_t p = 0; p < model.primitives.size(); ++p) {
|
||||
// get primitive vertices
|
||||
auto& primitive = model.primitives.at(p);
|
||||
auto& p0 = *(vec4d_f_t*)&model.vertives.at(primitive.i0);
|
||||
auto& p1 = *(vec4d_f_t*)&model.vertives.at(primitive.i1);
|
||||
auto& p2 = *(vec4d_f_t*)&model.vertives.at(primitive.i2);
|
||||
|
||||
vec3d_fx_t edges[3];
|
||||
rect_u_t bbox;
|
||||
|
||||
{
|
||||
// Convert position from clip to 2D homogenous device space
|
||||
vec4d_f_t v0, v1, v2;
|
||||
ClipTo2DH(&v0, p0, width, height);
|
||||
ClipTo2DH(&v1, p1, width, height);
|
||||
ClipTo2DH(&v2, p2, width, height);
|
||||
|
||||
// Calculate edge equation
|
||||
auto det = EdgeEquation(edges, v0, v1, v2);
|
||||
if (det <= 0) {
|
||||
// reject back-facing or degenerate triangles
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Convert position from clip to screen space
|
||||
vec4d_f_t v0, v1, v2;
|
||||
ClipToScreen(&v0, p0, width, height);
|
||||
ClipToScreen(&v1, p1, width, height);
|
||||
ClipToScreen(&v2, p2, width, height);
|
||||
|
||||
// Calculate bounding box
|
||||
rect_f_t tmp;
|
||||
CalcBoundingBox(&tmp, *(vec2d_f_t*)&v0, *(vec2d_f_t*)&v1, *(vec2d_f_t*)&v2);
|
||||
bbox.left = std::max<int32_t>(0, tmp.left);
|
||||
bbox.right = std::min<int32_t>(width, tmp.right);
|
||||
bbox.top = std::max<int32_t>(0, tmp.top);
|
||||
bbox.bottom = std::min<int32_t>(height, tmp.bottom);
|
||||
}
|
||||
|
||||
// Calculate min/max tile positions
|
||||
auto tileSize = 1 << logTileSize;
|
||||
auto minTileX = bbox.left >> logTileSize;
|
||||
auto minTileY = bbox.top >> logTileSize;
|
||||
auto maxTileX = (bbox.right + tileSize - 1) >> logTileSize;
|
||||
auto maxTileY = (bbox.bottom + tileSize - 1) >> logTileSize;
|
||||
|
||||
// Starting tile coordinates
|
||||
auto X = minTileX << logTileSize;
|
||||
auto Y = minTileY << logTileSize;
|
||||
|
||||
// Add tile corner edge offsets
|
||||
fixed16_t extents[3];
|
||||
extents[0] = calcEdgeExtents(edges[0], logTileSize);
|
||||
extents[1] = calcEdgeExtents(edges[1], logTileSize);
|
||||
extents[2] = calcEdgeExtents(edges[2], logTileSize);
|
||||
|
||||
// Evaluate edge equation for the starting tile
|
||||
auto E0 = evalEdgeFunction(edges[0], X, Y);
|
||||
auto E1 = evalEdgeFunction(edges[1], X, Y);
|
||||
auto E2 = evalEdgeFunction(edges[2], X, Y);
|
||||
|
||||
// traverse covered tiles
|
||||
for (uint32_t ty = minTileY; ty < maxTileY; ++ty) {
|
||||
auto e0 = E0;
|
||||
auto e1 = E1;
|
||||
auto e2 = E2;
|
||||
for (uint32_t tx = minTileX; tx < maxTileX; ++tx) {
|
||||
// check if tile overlap triangle
|
||||
if ((e0 + extents[0]) >= fxZero
|
||||
&& (e1 + extents[1]) >= fxZero
|
||||
&& (e2 + extents[2]) >= fxZero) {
|
||||
// assign primitive to tile
|
||||
uint32_t tile_id = (ty << 16) | tx;
|
||||
tiles[tile_id].push_back(p);
|
||||
++num_prims;
|
||||
}
|
||||
|
||||
// update edge equation x components
|
||||
e0 += edges[0].x << logTileSize;
|
||||
e1 += edges[1].x << logTileSize;
|
||||
e2 += edges[2].x << logTileSize;
|
||||
}
|
||||
// update edge equation y components
|
||||
E0 += edges[0].y << logTileSize;
|
||||
E1 += edges[1].y << logTileSize;
|
||||
E2 += edges[2].y << logTileSize;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
primbuf.reserve(model.primitives.size() * sizeof(rast_prim_t));
|
||||
auto prim_data = primbuf.data();
|
||||
for (auto& primitive : model.primitives) {
|
||||
// get primitive vertices
|
||||
auto& p0 = *(vec4d_f_t*)&model.vertives.at(primitive.i0);
|
||||
auto& p1 = *(vec4d_f_t*)&model.vertives.at(primitive.i1);
|
||||
auto& p2 = *(vec4d_f_t*)&model.vertives.at(primitive.i2);
|
||||
|
||||
rast_prim_t prim{
|
||||
rast_vtx_t{p0.x, p0.y, p0.z, p0.w},
|
||||
rast_vtx_t{p1.x, p1.y, p1.z, p1.w},
|
||||
rast_vtx_t{p2.x, p2.y, p2.z, p2.w},
|
||||
};
|
||||
|
||||
*(rast_prim_t*)(prim_data) = prim;
|
||||
prim_data += sizeof(rast_prim_t);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
tilebuf.reserve(tiles.size() * sizeof(rast_tile_header_t) + num_prims * sizeof(uint32_t));
|
||||
auto tile_data = tilebuf.data();
|
||||
for (auto it : tiles) {
|
||||
rast_tile_header_t header{it.first, (uint32_t)it.second.size()};
|
||||
*(rast_tile_header_t*)(tile_data) = header;
|
||||
tile_data += sizeof(rast_tile_header_t);
|
||||
memcpy(tile_data, it.second.data(), it.second.size() * sizeof(uint32_t));
|
||||
tile_data += it.second.size() * sizeof(uint32_t);
|
||||
}
|
||||
}
|
||||
|
||||
return tiles.size();
|
||||
}
|
||||
|
||||
std::string getFileExt(const std::string& str) {
|
||||
auto i = str.rfind('.');
|
||||
if (i != std::string::npos) {
|
||||
return str.substr(i+1);
|
||||
}
|
||||
return("");
|
||||
auto i = str.rfind('.');
|
||||
if (i != std::string::npos) {
|
||||
return str.substr(i+1);
|
||||
}
|
||||
return("");
|
||||
}
|
||||
|
||||
bool iequals(const std::string& a, const std::string& b) {
|
||||
auto sz = a.size();
|
||||
if (b.size() != sz)
|
||||
return false;
|
||||
for (size_t i = 0; i < sz; ++i) {
|
||||
if (tolower(a[i]) != tolower(b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
auto sz = a.size();
|
||||
if (b.size() != sz)
|
||||
return false;
|
||||
for (size_t i = 0; i < sz; ++i) {
|
||||
if (tolower(a[i]) != tolower(b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int LoadImage(const char *filename,
|
||||
|
|
|
@ -1,8 +1,16 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <bitmanip.h>
|
||||
#include <cocogfx/include/format.h>
|
||||
#include <cocogfx/include/blitter.h>
|
||||
#include <cocogfx/include/format.hpp>
|
||||
#include <cocogfx/include/blitter.hpp>
|
||||
#include "model.h"
|
||||
|
||||
uint32_t Binning(std::vector<uint8_t>& tilebuf,
|
||||
std::vector<uint8_t>& primbuf,
|
||||
const model_t& model,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t tileSize);
|
||||
|
||||
int LoadImage(const char *filename,
|
||||
cocogfx::ePixelFormat format,
|
||||
|
|
|
@ -105,12 +105,10 @@ void cleanup() {
|
|||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp) {
|
||||
(void)bpp;
|
||||
int render(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t width,
|
||||
uint32_t height) {
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// start device
|
||||
|
@ -137,7 +135,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
|
||||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
//dump_image(dst_pixels, width, height, bpp);
|
||||
//dump_image(dst_pixels, width, height, 4);
|
||||
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
|
||||
|
||||
return 0;
|
||||
|
@ -227,10 +225,9 @@ int main(int argc, char *argv[]) {
|
|||
kernel_arg.wrapu = wrap;
|
||||
kernel_arg.wrapv = wrap;
|
||||
|
||||
kernel_arg.src_addr = src_addr;
|
||||
kernel_arg.src_logwidth = src_logwidth;
|
||||
kernel_arg.src_logheight = src_logheight;
|
||||
kernel_arg.src_addr = src_addr;
|
||||
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
kernel_arg.mip_offs[i] = mip_offsets.at(i);
|
||||
|
@ -269,8 +266,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// configure texture units
|
||||
vx_csr_write(device, CSR_TEX_STAGE, 0);
|
||||
vx_csr_write(device, CSR_TEX_WIDTH, src_logwidth);
|
||||
vx_csr_write(device, CSR_TEX_HEIGHT, src_logheight);
|
||||
vx_csr_write(device, CSR_TEX_LOGWIDTH, src_logwidth);
|
||||
vx_csr_write(device, CSR_TEX_LOGHEIGHT, src_logheight);
|
||||
vx_csr_write(device, CSR_TEX_FORMAT, format);
|
||||
vx_csr_write(device, CSR_TEX_WRAPU, wrap);
|
||||
vx_csr_write(device, CSR_TEX_WRAPV, wrap);
|
||||
|
@ -281,9 +278,9 @@ int main(int argc, char *argv[]) {
|
|||
vx_csr_write(device, CSR_TEX_MIPOFF(i), mip_offsets.at(i));
|
||||
};
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp));
|
||||
// render
|
||||
std::cout << "render" << std::endl;
|
||||
RT_CHECK(render(kernel_arg, dst_bufsize, dst_width, dst_height));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <cocogfx/include/tga.h>
|
||||
#include <cocogfx/include/png.h>
|
||||
#include <cocogfx/include/tga.hpp>
|
||||
#include <cocogfx/include/png.hpp>
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <bitmanip.h>
|
||||
#include <cocogfx/include/format.h>
|
||||
#include <cocogfx/include/blitter.h>
|
||||
#include <cocogfx/include/format.hpp>
|
||||
#include <cocogfx/include/blitter.hpp>
|
||||
|
||||
int LoadImage(const char *filename,
|
||||
cocogfx::ePixelFormat format,
|
||||
|
|
2
third_party/cocogfx
vendored
2
third_party/cocogfx
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 1c6111c96cdb23c1286495be903501aff007cd75
|
||||
Subproject commit ff85ba2bd69176a19e92390cca1ab9888fbbbb3e
|
Loading…
Add table
Add a link
Reference in a new issue