draw3d test updates

This commit is contained in:
Blaise Tine 2022-02-27 22:44:01 -05:00
parent 86d43efcac
commit 0deacb46c5
22 changed files with 536 additions and 200 deletions

View file

@ -381,8 +381,8 @@
`define TEX_FXD_FRAC (`TEX_DIM_BITS+`TEX_SUBPIXEL_BITS)
`define TEX_STATE_ADDR 0
`define TEX_STATE_WIDTH 1
`define TEX_STATE_HEIGHT 2
`define TEX_STATE_LOGWIDTH 1
`define TEX_STATE_LOGHEIGHT 2
`define TEX_STATE_FORMAT 3
`define TEX_STATE_FILTER 4
`define TEX_STATE_WRAPU 5
@ -393,8 +393,8 @@
`define CSR_TEX_STATE_BEGIN 12'h7C0
`define CSR_TEX_STAGE `CSR_TEX_STATE_BEGIN
`define CSR_TEX_ADDR (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_ADDR)
`define CSR_TEX_WIDTH (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_WIDTH)
`define CSR_TEX_HEIGHT (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_HEIGHT)
`define CSR_TEX_LOGWIDTH (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_LOGWIDTH)
`define CSR_TEX_LOGHEIGHT (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_LOGHEIGHT)
`define CSR_TEX_FORMAT (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_FORMAT)
`define CSR_TEX_FILTER (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_FILTER)
`define CSR_TEX_WRAPU (`CSR_TEX_STATE_BEGIN+1+`TEX_STATE_WRAPU)
@ -406,21 +406,21 @@
// Raster Units ///////////////////////////////////////////////////////////////
`define RASTER_STATE_PIDX_ADDR 0
`define RASTER_STATE_PIDX_SIZE 1
`define RASTER_DIM_BITS 15
`define RASTER_STATE_TBUF_ADDR 0
`define RASTER_STATE_TILE_COUNT 1
`define RASTER_STATE_PBUF_ADDR 2
`define RASTER_STATE_PBUF_STRIDE 3
`define RASTER_STATE_TILE_XY 4
`define RASTER_STATE_TILE_WH 5
`define RASTER_STATE_COUNT 6
`define RASTER_STATE_TILE_LOGSIZE 4
`define RASTER_STATE_COUNT 5
`define CSR_RASTER_STATE_BEGIN `CSR_TEX_STATE_END
`define CSR_RASTER_PIDX_ADDR (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PIDX_ADDR)
`define CSR_RASTER_PIDX_SIZE (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PIDX_SIZE)
`define CSR_RASTER_TBUF_ADDR (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TBUF_ADDR)
`define CSR_RASTER_TILE_COUNT (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_COUNT)
`define CSR_RASTER_PBUF_ADDR (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PBUF_ADDR)
`define CSR_RASTER_PBUF_STRIDE (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_PBUF_STRIDE)
`define CSR_RASTER_TILE_XY (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_XY)
`define CSR_RASTER_TILE_WH (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_WH)
`define CSR_RASTER_TILE_LOGSIZE (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_TILE_LOGSIZE)
`define CSR_RASTER_STATE_END (`CSR_RASTER_STATE_BEGIN+`RASTER_STATE_COUNT)
`define CSR_RASTER_STATE(addr) ((addr) - `CSR_RASTER_STATE_BEGIN)

View file

@ -22,11 +22,11 @@ module VX_raster_csr (
csrs <= 0;
end else if (csr_wr_valid) begin
case (csr_wr_addr)
`CSR_RASTER_PIDX_ADDR: begin
csrs.pidx_addr <= csr_wr_data[31:0];
`CSR_RASTER_TBUF_ADDR: begin
csrs.tbuf_addr <= csr_wr_data[31:0];
end
`CSR_RASTER_PIDX_SIZE: begin
csrs.pidx_size <= csr_wr_data[31:0];
`CSR_RASTER_TILE_COUNT: begin
csrs.tile_count <= csr_wr_data[31:0];
end
`CSR_RASTER_PBUF_ADDR: begin
csrs.pbuf_addr <= csr_wr_data[31:0];
@ -34,13 +34,8 @@ module VX_raster_csr (
`CSR_RASTER_PBUF_STRIDE: begin
csrs.pbuf_stride <= csr_wr_data[31:0];
end
`CSR_RASTER_TILE_XY: begin
csrs.tile_left <= csr_wr_data[15:0];
csrs.tile_top <= csr_wr_data[31:16];
end
`CSR_RASTER_TILE_WH: begin
csrs.tile_width <= csr_wr_data[15:0];
csrs.tile_height <= csr_wr_data[31:16];
`CSR_RASTER_TILE_LOGSIZE: begin
csrs.tile_logsize <= csr_wr_data[15:0];
end
endcase
end

View file

@ -9,12 +9,11 @@ task trace_raster_state (
input [`CSR_ADDR_BITS-1:0] state
);
case (state)
`CSR_RASTER_PIDX_ADDR: dpi_trace("PIDX_ADDR");
`CSR_RASTER_PIDX_SIZE: dpi_trace("PIDX_SIZE");
`CSR_RASTER_TBUF_ADDR: dpi_trace("TBUF_ADDR");
`CSR_RASTER_TILE_COUNT: dpi_trace("TILE_COUNT");
`CSR_RASTER_PBUF_ADDR: dpi_trace("PBUF_ADDR");
`CSR_RASTER_PBUF_STRIDE: dpi_trace("PBUF_STRIDE");
`CSR_RASTER_TILE_XY: dpi_trace("TILE_XY");
`CSR_RASTER_TILE_WH: dpi_trace("TILE_WH");
`CSR_RASTER_TILE_LOGSIZE:dpi_trace("TILE_LOGSIZE);
default: dpi_trace("??");
endcase
endtask

View file

@ -3,17 +3,16 @@
`include "VX_define.vh"
`define RASTER_DIM_BITS 15
package raster_types;
typedef struct packed {
logic [31:0] pidx_addr;
logic [31:0] pidx_size;
logic [31:0] tbuf_addr;
logic [31:0] tile_count;
logic [31:0] pbuf_addr;
logic [31:0] pbuf_stride;
logic [15:0] tile_left;
logic [15:0] tile_top;
logic [15:0] tile_width;
logic [15:0] tile_height;
logic [15:0] tile_logsize;
} raster_csrs_t;
endpackage

View file

@ -60,10 +60,10 @@ module VX_tex_csr #(
`CSR_TEX_FILTER: begin
tex_filter[csr_tex_stage] <= csr_wr_data[`TEX_FILTER_BITS-1:0];
end
`CSR_TEX_WIDTH: begin
`CSR_TEX_LOGWIDTH: begin
tex_logdims[csr_tex_stage][0] <= csr_wr_data[`TEX_LOD_BITS-1:0];
end
`CSR_TEX_HEIGHT: begin
`CSR_TEX_LOGHEIGHT: begin
tex_logdims[csr_tex_stage][1] <= csr_wr_data[`TEX_LOD_BITS-1:0];
end
default: begin

View file

@ -36,15 +36,15 @@ task trace_tex_state (
input [`CSR_ADDR_BITS-1:0] state
);
case (state)
`CSR_TEX_ADDR: dpi_trace("ADDR");
`CSR_TEX_WIDTH: dpi_trace("WIDTH");
`CSR_TEX_HEIGHT: dpi_trace("HEIGHT");
`CSR_TEX_FORMAT: dpi_trace("FORMAT");
`CSR_TEX_FILTER: dpi_trace("FILTER");
`CSR_TEX_WRAPU: dpi_trace("WRAPU");
`CSR_TEX_WRAPV: dpi_trace("WRAPV");
`CSR_TEX_ADDR: dpi_trace("ADDR");
`CSR_TEX_LOGWIDTH: dpi_trace("LOGWIDTH");
`CSR_TEX_LOGHEIGHT: dpi_trace("LOGHEIGHT");
`CSR_TEX_FORMAT: dpi_trace("FORMAT");
`CSR_TEX_FILTER: dpi_trace("FILTER");
`CSR_TEX_WRAPU: dpi_trace("WRAPU");
`CSR_TEX_WRAPV: dpi_trace("WRAPV");
//`CSR_TEX_MIPOFF:
default: dpi_trace("MIPOFF");
default: dpi_trace("MIPOFF");
endcase
endtask

View file

@ -1,7 +1,7 @@
#pragma once
#include <cstdint>
#include <cocogfx/include/fixed.h>
#include <cocogfx/include/fixed.hpp>
#include <bitmanip.h>
using namespace cocogfx;
@ -23,11 +23,11 @@ enum class TexFormat {
};
template <uint32_t F, typename T = int32_t>
T Clamp(Fixed<F,T> fx, WrapMode mode) {
T Clamp(TFixed<F,T> fx, WrapMode mode) {
switch (mode) {
case WrapMode::Clamp: return (fx.data() < 0) ? 0 : ((fx.data() > Fixed<F,T>::MASK) ? Fixed<F,T>::MASK : fx.data());
case WrapMode::Repeat: return (fx.data() & Fixed<F,T>::MASK);
case WrapMode::Mirror: return (bit_get(fx.data(), Fixed<F,T>::FRAC) ? ~fx.data() : fx.data());
case WrapMode::Clamp: return (fx.data() < 0) ? 0 : ((fx.data() > TFixed<F,T>::MASK) ? TFixed<F,T>::MASK : fx.data());
case WrapMode::Repeat: return (fx.data() & TFixed<F,T>::MASK);
case WrapMode::Mirror: return (bit_get(fx.data(), TFixed<F,T>::FRAC) ? ~fx.data() : fx.data());
default:
std::abort();
return 0;
@ -121,8 +121,8 @@ inline uint32_t Lerp8888(uint32_t a, uint32_t b, uint32_t f) {
}
template <uint32_t F, typename T = int32_t>
void TexAddressLinear(Fixed<F,T> fu,
Fixed<F,T> fv,
void TexAddressLinear(TFixed<F,T> fu,
TFixed<F,T> fv,
uint32_t log_width,
uint32_t log_height,
WrapMode wrapu,
@ -134,16 +134,16 @@ void TexAddressLinear(Fixed<F,T> fu,
uint32_t* alpha,
uint32_t* beta
) {
auto delta_x = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_width);
auto delta_y = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_height);
auto delta_x = TFixed<F,T>::make(TFixed<F,T>::HALF >> log_width);
auto delta_y = TFixed<F,T>::make(TFixed<F,T>::HALF >> log_height);
uint32_t u0 = Clamp(fu - delta_x, wrapu);
uint32_t u1 = Clamp(fu + delta_x, wrapu);
uint32_t v0 = Clamp(fv - delta_y, wrapv);
uint32_t v1 = Clamp(fv + delta_y, wrapv);
uint32_t shift_u = (Fixed<F,T>::FRAC - log_width);
uint32_t shift_v = (Fixed<F,T>::FRAC - log_height);
uint32_t shift_u = (TFixed<F,T>::FRAC - log_width);
uint32_t shift_v = (TFixed<F,T>::FRAC - log_height);
uint32_t x0s = (u0 << 8) >> shift_u;
uint32_t y0s = (v0 << 8) >> shift_v;
@ -165,8 +165,8 @@ void TexAddressLinear(Fixed<F,T> fu,
}
template <uint32_t F, typename T = int32_t>
void TexAddressPoint(Fixed<F,T> fu,
Fixed<F,T> fv,
void TexAddressPoint(TFixed<F,T> fu,
TFixed<F,T> fv,
uint32_t log_width,
uint32_t log_height,
WrapMode wrapu,
@ -176,8 +176,8 @@ void TexAddressPoint(Fixed<F,T> fu,
uint32_t u = Clamp(fu, wrapu);
uint32_t v = Clamp(fv, wrapv);
uint32_t x = u >> (Fixed<F,T>::FRAC - log_width);
uint32_t y = v >> (Fixed<F,T>::FRAC - log_height);
uint32_t x = u >> (TFixed<F,T>::FRAC - log_width);
uint32_t y = v >> (TFixed<F,T>::FRAC - log_height);
*addr = x + (y << log_width);

View file

@ -3,7 +3,7 @@ RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi
THIRD_PARTY_DIR = ../../third_party
CXXFLAGS += -std=C++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../../hw -I../../common
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include

View file

@ -46,11 +46,11 @@ public:
uint32_t read(uint32_t stage, int32_t u, int32_t v, int32_t lod, TraceData* trace_data) {
auto& states = core_->global_csrs_.tex_csrs.at(stage);
auto xu = Fixed<TEX_FXD_FRAC>::make(u);
auto xv = Fixed<TEX_FXD_FRAC>::make(v);
auto xu = TFixed<TEX_FXD_FRAC>::make(u);
auto xv = TFixed<TEX_FXD_FRAC>::make(v);
auto base_addr = states.at(TEX_STATE_ADDR) + states.at(TEX_STATE_MIPOFF(lod));
auto log_width = std::max<int32_t>(states.at(TEX_STATE_WIDTH) - lod, 0);
auto log_height = std::max<int32_t>(states.at(TEX_STATE_HEIGHT) - lod, 0);
auto log_width = std::max<int32_t>(states.at(TEX_STATE_LOGWIDTH) - lod, 0);
auto log_height = std::max<int32_t>(states.at(TEX_STATE_LOGHEIGHT) - lod, 0);
auto format = (TexFormat)states.at(TEX_STATE_FORMAT);
auto filter = (FilterMode)states.at(TEX_STATE_FILTER);
auto wrapu = (WrapMode)states.at(TEX_STATE_WRAPU);

View file

@ -11,14 +11,14 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -std=c++14 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
VX_SRCS = kernel.c
VX_SRCS = kernel.cpp
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party

View file

@ -7,33 +7,35 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
typedef struct {
float x;
float y;
float z;
float x;
float y;
float z;
float w;
uint32_t c;
float u;
float v;
} vtx_t;
float u;
float v;
} vertex_t;
typedef struct {
float x;
float y;
float z;
float w;
} rast_vtx_t;
typedef struct {
vtx_t v0;
vtx_t v1;
vtx_t v2;
} prim_t;
rast_vtx_t v0;
rast_vtx_t v1;
rast_vtx_t v2;
} rast_prim_t;
typedef struct {
uint32_t top;
uint32_t left;
uint32_t width;
uint32_t height;
uint32_t num_prims;
uint32_t* indices;
} tile_t;
uint32_t tileXY;
uint32_t num_prims;
} rast_tile_header_t;
typedef struct {
uint32_t num_tiles;
uint32_t tiles_addr;
uint32_t prims_addr;
uint32_t vts_addr;
uint32_t dst_addr;
uint32_t dst_width;
uint32_t dst_height;

View file

@ -14,17 +14,6 @@ void kernel_body(int task_id, tile_arg_t* arg) {
int main() {
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
// configure raster unit
// configure rop unit
tile_arg_t targ;
targ.state = arg;
vx_spawn_tasks(arg->num_tiles, (vx_spawn_tasks_cb)kernel_body, &targ);
/*for (uint32_t t=0; t < arg->num_tiles; ++t) {
kernel_body(t, &targ);
}*/
// TODO
return 0;
}

View file

@ -4,10 +4,12 @@
#include <string.h>
#include <chrono>
#include <cmath>
#include <array>
#include <assert.h>
#include <vortex.h>
#include "common.h"
#include "utils.h"
#include "model_quad.h"
using namespace cocogfx;
@ -24,35 +26,48 @@ using namespace cocogfx;
///////////////////////////////////////////////////////////////////////////////
const char* kernel_file = "kernel.bin";
const char* input_file = "soccer.png";
const char* output_file = "output.png";
uint32_t dst_width = 64;
ePixelFormat src_format = FORMAT_A8R8G8B8;
int src_wrap = 0;
int src_filter = 0; // 0-> point, 1->bilinear
uint32_t dst_width = 64;
uint32_t dst_height = 64;
uint32_t tile_size = 64;
const model_t& model = model_quad;
vx_device_h device = nullptr;
vx_buffer_h staging_buf = nullptr;
uint64_t tilebuf_addr;
uint64_t primbuf_addr;
uint64_t srcbuf_addr;
uint64_t dstbuf_addr;
kernel_arg_t kernel_arg;
static void show_usage() {
std::cout << "Vortex 3D Rendering Test." << std::endl;
std::cout << "Usage: [-k: kernel] [-o image] [-u width] [-v height] [-h: help]" << std::endl;
std::cout << "Usage: [-i texture] [-o output] [-w width] [-h height] [-t tilesize]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "k:o:u:v:h?")) != -1) {
while ((c = getopt(argc, argv, "k:o:w:h:?")) != -1) {
switch (c) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 'u':
case 'w':
dst_width = std::atoi(optarg);
break;
case 'v':
case 'h':
dst_height = std::atoi(optarg);
break;
case 'k':
kernel_file = optarg;
case 't':
tile_size = std::atoi(optarg);
break;
case 'h':
case '?': {
show_usage();
exit(0);
@ -69,19 +84,18 @@ void cleanup() {
vx_buf_free(staging_buf);
}
if (device) {
vx_mem_free(device, kernel_arg.tiles_addr);
vx_mem_free(device, kernel_arg.prims_addr);
vx_mem_free(device, kernel_arg.dst_addr);
vx_mem_free(device, tilebuf_addr);
vx_mem_free(device, primbuf_addr);
vx_mem_free(device, srcbuf_addr);
vx_mem_free(device, dstbuf_addr);
vx_dev_close(device);
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t width,
uint32_t height,
uint32_t bpp) {
(void)bpp;
int render(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t width,
uint32_t height) {
auto time_start = std::chrono::high_resolution_clock::now();
// start device
@ -107,31 +121,49 @@ int run_test(const kernel_arg_t& kernel_arg,
}
// save output image
std::cout << "save output image" << std::endl;
//dump_image(dst_pixels, width, height, bpp);
std::cout << "save output image" << std::endl;
//dump_image(dst_pixels, width, height, 4);
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
return 0;
}
void allocate_tiles() {
// TODO
}
int main(int argc, char *argv[]) {
std::vector<uint8_t> tilebuf;
std::vector<uint8_t> primbuf;
int main(int argc, char *argv[]) {
std::vector<tile_t> tiles;
std::vector<prim_t> primitives;
std::vector<uint8_t> srcbuf;
std::vector<uint32_t> mip_offsets;
uint32_t src_width;
uint32_t src_height;
// parse command arguments
parse_args(argc, argv);
uint32_t dst_bpp = 4;
uint32_t dst_bufsize = dst_bpp * dst_width * dst_height;
if (!ispow2(tile_size)) {
std::cout << "Error: only power of two tile_size supported: tile_size=" << tile_size << std::endl;
return -1;
}
allocate_tiles();
uint32_t tile_bufsize = tiles.size() * sizeof(tile_t);
uint32_t prim_bufsize = primitives.size() * sizeof(prim_t);
if (!ispow2(dst_width)) {
std::cout << "Error: only power of two dst_width supported: dst_width=" << dst_width << std::endl;
return -1;
}
if (!ispow2(dst_height)) {
std::cout << "Error: only power of two dst_height supported: dst_height=" << dst_height << std::endl;
return -1;
}
if (0 != (dst_width % tile_size)) {
std::cout << "Error: dst_with must be divisible by tile_size" << std::endl;
return -1;
}
if (0 != (dst_height % tile_size)) {
std::cout << "Error: dst_height must be divisible by tile_size" << std::endl;
return -1;
}
// open device connection
std::cout << "open device connection" << std::endl;
@ -144,36 +176,50 @@ int main(int argc, char *argv[]) {
return -1;
}
uint64_t max_cores, max_warps, max_threads;
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
{
std::vector<uint8_t> staging;
RT_CHECK(LoadImage(input_file, src_format, staging, &src_width, &src_height));
// check power of two support
if (!ispow2(src_width) || !ispow2(src_height)) {
std::cout << "Error: only power of two textures supported: width=" << src_width << ", heigth=" << src_height << std::endl;
return -1;
}
uint32_t num_tasks = max_cores * max_warps * max_threads;
RT_CHECK(GenerateMipmaps(srcbuf, mip_offsets, staging, src_format, src_width, src_height, src_width * 4));
}
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
std::cout << "destination staging_buf: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl;
uint32_t src_logwidth = log2ceil(src_width);
uint32_t src_logheight = log2ceil(src_height);
uint32_t dstbuf_size = dst_width * dst_height * 4;
uint32_t logTileSize = log2ceil(tile_size);
// Perform tile binning
auto num_tiles = Binning(tilebuf, primbuf, model, dst_width, dst_height, tile_size);
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
uint64_t tile_addr, prim_addr, dst_addr;
RT_CHECK(vx_mem_alloc(device, tile_bufsize, &tile_addr));
RT_CHECK(vx_mem_alloc(device, prim_bufsize, &prim_addr));
RT_CHECK(vx_mem_alloc(device, dst_bufsize, &dst_addr));
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_mem_alloc(device, tilebuf.size(), &tilebuf_addr));
RT_CHECK(vx_mem_alloc(device, primbuf.size(), &primbuf_addr));
RT_CHECK(vx_mem_alloc(device, srcbuf.size(), &srcbuf_addr));
RT_CHECK(vx_mem_alloc(device, dstbuf_size, &dstbuf_addr));
std::cout << "tile_addr=0x" << std::hex << tile_addr << std::endl;
std::cout << "prim_addr=0x" << std::hex << prim_addr << std::endl;
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
std::cout << "tilebuf_addr=0x" << std::hex << tilebuf_addr << std::endl;
std::cout << "primbuf_addr=0x" << std::hex << primbuf_addr << std::endl;
std::cout << "srcbuf_addr=0x" << std::hex << srcbuf_addr << std::endl;
std::cout << "dstbuf_addr=0x" << std::hex << dstbuf_addr << std::endl;
// allocate staging shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t),
std::max<uint32_t>(tile_bufsize,
std::max<uint32_t>(prim_bufsize, dst_bufsize)));
std::max<uint32_t>(tilebuf.size(),
std::max<uint32_t>(primbuf.size(), dstbuf_size)));
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
// upload kernel argument
@ -181,9 +227,9 @@ int main(int argc, char *argv[]) {
{
kernel_arg.dst_width = dst_width;
kernel_arg.dst_height = dst_height;
kernel_arg.dst_stride = dst_bpp;
kernel_arg.dst_pitch = dst_bpp * dst_width;
kernel_arg.dst_addr = dst_addr;
kernel_arg.dst_stride = 4;
kernel_arg.dst_pitch = 4 * dst_width;
kernel_arg.dst_addr = dstbuf_addr;
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
@ -193,36 +239,55 @@ int main(int argc, char *argv[]) {
// upload tiles buffer
std::cout << "upload tiles buffer" << std::endl;
{
auto buf_ptr = (tile_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < tiles.size(); ++i) {
buf_ptr[i] = tiles.at(i);
}
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.tiles_addr, tile_bufsize, 0));
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, tilebuf.data(), tilebuf.size());
RT_CHECK(vx_copy_to_dev(staging_buf, tilebuf_addr, tilebuf.size(), 0));
}
// upload primitives buffer
std::cout << "upload primitives buffer" << std::endl;
{
auto buf_ptr = (prim_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < primitives.size(); ++i) {
buf_ptr[i] = primitives.at(i);
}
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.prims_addr, prim_bufsize, 0));
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
memcpy(buf_ptr, primbuf.data(), primbuf.size());
RT_CHECK(vx_copy_to_dev(staging_buf, primbuf_addr, primbuf.size(), 0));
}
// clear destination buffer
std::cout << "clear destination buffer" << std::endl;
{
auto buf_ptr = (uint32_t*)vx_host_ptr(staging_buf);
for (uint32_t i = 0; i < (dst_bufsize/4); ++i) {
for (uint32_t i = 0; i < (dstbuf_size/4); ++i) {
buf_ptr[i] = 0xdeadbeef;
}
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dst_bufsize, 0));
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, dstbuf_size, 0));
}
// configure texture units
vx_csr_write(device, CSR_TEX_STAGE, 0);
vx_csr_write(device, CSR_TEX_LOGWIDTH, src_logwidth);
vx_csr_write(device, CSR_TEX_LOGHEIGHT, src_logheight);
vx_csr_write(device, CSR_TEX_FORMAT, src_format);
vx_csr_write(device, CSR_TEX_WRAPU, src_wrap);
vx_csr_write(device, CSR_TEX_WRAPV, src_wrap);
vx_csr_write(device, CSR_TEX_FILTER, src_filter);
vx_csr_write(device, CSR_TEX_ADDR, srcbuf_addr);
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
assert(i < TEX_LOD_MAX);
vx_csr_write(device, CSR_TEX_MIPOFF(i), mip_offsets.at(i));
};
// configure raster units
vx_csr_write(device, CSR_RASTER_TBUF_ADDR, tilebuf_addr);
vx_csr_write(device, CSR_RASTER_TILE_COUNT, num_tiles);
vx_csr_write(device, CSR_RASTER_PBUF_ADDR, primbuf_addr);
vx_csr_write(device, CSR_RASTER_PBUF_STRIDE, sizeof(rast_prim_t));
vx_csr_write(device, CSR_RASTER_TILE_LOGSIZE, logTileSize);
// configure rop units
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp));
std::cout << "render" << std::endl;
RT_CHECK(render(kernel_arg, dstbuf_size, dst_width, dst_height));
// cleanup
std::cout << "cleanup" << std::endl;

View file

@ -0,0 +1,15 @@
#pragma once
#include "common.h"
#include <vector>
typedef struct {
uint32_t i0;
uint32_t i1;
uint32_t i2;
} primitive_t;
struct model_t {
std::vector<vertex_t> vertives;
std::vector<primitive_t> primitives;
};

View file

@ -0,0 +1,55 @@
#pragma once
#include "model.h"
const model_t model_quad = {
{
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000},
{-6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 1.000000, 0.000000},
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000},
{-6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 1.000000, 0.000000},
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000},
{-6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 1.000000, 0.000000},
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
{6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{-6.337301, 11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{-6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 1.000000, 1.000000},
{6.337301, -11.949731, 18.974358, 20.000000, 0xffffffff, 0.000000, 1.000000},
{6.337301, 0.000000, 13.770777, 15.050253, 0xffffffff, 0.000000, 0.000000}
}, {
{2, 1, 3},
{3, 1, 0},
{1, 2, 0},
{0, 2, 3},
{4, 0, 5},
{5, 0, 1},
{1, 5, 0},
{0, 5, 4},
{3, 0, 7},
{7, 0, 4},
{5, 4, 1},
{1, 4, 0}
}
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

View file

@ -3,28 +3,240 @@
#include <string>
#include <iostream>
#include <iomanip>
#include <cocogfx/include/tga.h>
#include <cocogfx/include/png.h>
#include <string.h>
#include <unordered_map>
#include <cocogfx/include/tga.hpp>
#include <cocogfx/include/png.hpp>
#include <cocogfx/include/fixed.hpp>
#include <cocogfx/include/math.hpp>
using namespace cocogfx;
using fixed16_t = TFixed<16>;
using vec2d_f_t = TVector2<float>;
using vec2d_fx_t = TVector2<fixed16_t>;
using vec3d_fx_t = TVector3<fixed16_t>;
using vec4d_f_t = TVector4<float>;
using rect_f_t = TRect<float>;
using rect_u_t = TRect<uint32_t>;
static fixed16_t fxZero(0);
static fixed16_t fxHalf(0.5f);
// Evaluate edge function
static fixed16_t evalEdgeFunction(const vec3d_fx_t& e, uint32_t x, uint32_t y) {
return (e.x * x) + (e.y * y) + e.z;
}
// Calculate the edge extents for tile corners
static fixed16_t calcEdgeExtents(const vec3d_fx_t& e, uint32_t logTileSize) {
vec2d_fx_t corners[4] = {{fxZero, fxZero}, // 00
{e.x, fxZero}, // 10
{fxZero, e.y}, // 01
{e.x, e.y}}; // 11
auto i = (e.y >= fxZero) ? ((e.x >= fxZero) ? 3 : 2) : (e.x >= fxZero) ? 1 : 0;
return (corners[i].x + corners[i].y) << logTileSize;
}
static float EdgeEquation(vec3d_fx_t edges[3],
const vec4d_f_t& v0,
const vec4d_f_t& v1,
const vec4d_f_t& v2) {
// Calculate edge equation matrix
auto a0 = (v1.y * v2.w) - (v2.y * v1.w);
auto a1 = (v2.y * v0.w) - (v0.y * v2.w);
auto a2 = (v0.y * v1.w) - (v1.y * v0.w);
auto b0 = (v2.x * v1.w) - (v1.x * v2.w);
auto b1 = (v0.x * v2.w) - (v2.x * v0.w);
auto b2 = (v1.x * v0.w) - (v0.x * v1.w);
auto c0 = (v1.x * v2.y) - (v2.x * v1.y);
auto c1 = (v2.x * v0.y) - (v0.x * v2.y);
auto c2 = (v0.x * v1.y) - (v1.x * v0.y);
// Normalize the matrix
#define NORMALIZE(x, y, z) { auto t = 1.0 / (std::abs(x) + std::abs(y)); x *= t; y *= t; z *= t; }
NORMALIZE(a0, b0, c0)
NORMALIZE(a1, b1, c1)
NORMALIZE(a2, b2, c2)
// Convert the edge equation to fixedpoint
edges[0] = {fixed16_t(a0), fixed16_t(b0), fixed16_t(c0)};
edges[1] = {fixed16_t(a1), fixed16_t(b1), fixed16_t(c1)};
edges[2] = {fixed16_t(a2), fixed16_t(b2), fixed16_t(c2)};
/*printf("E0.x=%f, E0.y=%f, E0.z=%f, E1.x=%f, E1.y=%f, E1.z=%f, E2.x=%f, E2.y=%f, E2.z=%f\n",
float(edges[0].x), float(edges[0].y), float(edges[0].z),
float(edges[1].x), float(edges[1].y), float(edges[1].z),
float(edges[2].x), float(edges[2].y), float(edges[2].z));*/
auto det = c0 * v0.w + c1 * v1.w + c2 * v2.w;
return det;
}
// traverse model primitives and do tile assignment
uint32_t Binning(std::vector<uint8_t>& tilebuf,
std::vector<uint8_t>& primbuf,
const model_t& model,
uint32_t width,
uint32_t height,
uint32_t tileSize) {
uint32_t logTileSize = log2ceil(tileSize);
std::unordered_map<uint32_t, std::vector<uint32_t>> tiles;
uint32_t num_prims = 0;
for (uint32_t p = 0; p < model.primitives.size(); ++p) {
// get primitive vertices
auto& primitive = model.primitives.at(p);
auto& p0 = *(vec4d_f_t*)&model.vertives.at(primitive.i0);
auto& p1 = *(vec4d_f_t*)&model.vertives.at(primitive.i1);
auto& p2 = *(vec4d_f_t*)&model.vertives.at(primitive.i2);
vec3d_fx_t edges[3];
rect_u_t bbox;
{
// Convert position from clip to 2D homogenous device space
vec4d_f_t v0, v1, v2;
ClipTo2DH(&v0, p0, width, height);
ClipTo2DH(&v1, p1, width, height);
ClipTo2DH(&v2, p2, width, height);
// Calculate edge equation
auto det = EdgeEquation(edges, v0, v1, v2);
if (det <= 0) {
// reject back-facing or degenerate triangles
continue;
}
}
{
// Convert position from clip to screen space
vec4d_f_t v0, v1, v2;
ClipToScreen(&v0, p0, width, height);
ClipToScreen(&v1, p1, width, height);
ClipToScreen(&v2, p2, width, height);
// Calculate bounding box
rect_f_t tmp;
CalcBoundingBox(&tmp, *(vec2d_f_t*)&v0, *(vec2d_f_t*)&v1, *(vec2d_f_t*)&v2);
bbox.left = std::max<int32_t>(0, tmp.left);
bbox.right = std::min<int32_t>(width, tmp.right);
bbox.top = std::max<int32_t>(0, tmp.top);
bbox.bottom = std::min<int32_t>(height, tmp.bottom);
}
// Calculate min/max tile positions
auto tileSize = 1 << logTileSize;
auto minTileX = bbox.left >> logTileSize;
auto minTileY = bbox.top >> logTileSize;
auto maxTileX = (bbox.right + tileSize - 1) >> logTileSize;
auto maxTileY = (bbox.bottom + tileSize - 1) >> logTileSize;
// Starting tile coordinates
auto X = minTileX << logTileSize;
auto Y = minTileY << logTileSize;
// Add tile corner edge offsets
fixed16_t extents[3];
extents[0] = calcEdgeExtents(edges[0], logTileSize);
extents[1] = calcEdgeExtents(edges[1], logTileSize);
extents[2] = calcEdgeExtents(edges[2], logTileSize);
// Evaluate edge equation for the starting tile
auto E0 = evalEdgeFunction(edges[0], X, Y);
auto E1 = evalEdgeFunction(edges[1], X, Y);
auto E2 = evalEdgeFunction(edges[2], X, Y);
// traverse covered tiles
for (uint32_t ty = minTileY; ty < maxTileY; ++ty) {
auto e0 = E0;
auto e1 = E1;
auto e2 = E2;
for (uint32_t tx = minTileX; tx < maxTileX; ++tx) {
// check if tile overlap triangle
if ((e0 + extents[0]) >= fxZero
&& (e1 + extents[1]) >= fxZero
&& (e2 + extents[2]) >= fxZero) {
// assign primitive to tile
uint32_t tile_id = (ty << 16) | tx;
tiles[tile_id].push_back(p);
++num_prims;
}
// update edge equation x components
e0 += edges[0].x << logTileSize;
e1 += edges[1].x << logTileSize;
e2 += edges[2].x << logTileSize;
}
// update edge equation y components
E0 += edges[0].y << logTileSize;
E1 += edges[1].y << logTileSize;
E2 += edges[2].y << logTileSize;
}
}
{
primbuf.reserve(model.primitives.size() * sizeof(rast_prim_t));
auto prim_data = primbuf.data();
for (auto& primitive : model.primitives) {
// get primitive vertices
auto& p0 = *(vec4d_f_t*)&model.vertives.at(primitive.i0);
auto& p1 = *(vec4d_f_t*)&model.vertives.at(primitive.i1);
auto& p2 = *(vec4d_f_t*)&model.vertives.at(primitive.i2);
rast_prim_t prim{
rast_vtx_t{p0.x, p0.y, p0.z, p0.w},
rast_vtx_t{p1.x, p1.y, p1.z, p1.w},
rast_vtx_t{p2.x, p2.y, p2.z, p2.w},
};
*(rast_prim_t*)(prim_data) = prim;
prim_data += sizeof(rast_prim_t);
}
}
{
tilebuf.reserve(tiles.size() * sizeof(rast_tile_header_t) + num_prims * sizeof(uint32_t));
auto tile_data = tilebuf.data();
for (auto it : tiles) {
rast_tile_header_t header{it.first, (uint32_t)it.second.size()};
*(rast_tile_header_t*)(tile_data) = header;
tile_data += sizeof(rast_tile_header_t);
memcpy(tile_data, it.second.data(), it.second.size() * sizeof(uint32_t));
tile_data += it.second.size() * sizeof(uint32_t);
}
}
return tiles.size();
}
std::string getFileExt(const std::string& str) {
auto i = str.rfind('.');
if (i != std::string::npos) {
return str.substr(i+1);
}
return("");
auto i = str.rfind('.');
if (i != std::string::npos) {
return str.substr(i+1);
}
return("");
}
bool iequals(const std::string& a, const std::string& b) {
auto sz = a.size();
if (b.size() != sz)
return false;
for (size_t i = 0; i < sz; ++i) {
if (tolower(a[i]) != tolower(b[i]))
return false;
}
return true;
auto sz = a.size();
if (b.size() != sz)
return false;
for (size_t i = 0; i < sz; ++i) {
if (tolower(a[i]) != tolower(b[i]))
return false;
}
return true;
}
int LoadImage(const char *filename,

View file

@ -1,8 +1,16 @@
#include <cstdint>
#include <vector>
#include <bitmanip.h>
#include <cocogfx/include/format.h>
#include <cocogfx/include/blitter.h>
#include <cocogfx/include/format.hpp>
#include <cocogfx/include/blitter.hpp>
#include "model.h"
uint32_t Binning(std::vector<uint8_t>& tilebuf,
std::vector<uint8_t>& primbuf,
const model_t& model,
uint32_t width,
uint32_t height,
uint32_t tileSize);
int LoadImage(const char *filename,
cocogfx::ePixelFormat format,

View file

@ -105,12 +105,10 @@ void cleanup() {
}
}
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t width,
uint32_t height,
uint32_t bpp) {
(void)bpp;
int render(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t width,
uint32_t height) {
auto time_start = std::chrono::high_resolution_clock::now();
// start device
@ -137,7 +135,7 @@ int run_test(const kernel_arg_t& kernel_arg,
// save output image
std::cout << "save output image" << std::endl;
//dump_image(dst_pixels, width, height, bpp);
//dump_image(dst_pixels, width, height, 4);
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
return 0;
@ -227,10 +225,9 @@ int main(int argc, char *argv[]) {
kernel_arg.wrapu = wrap;
kernel_arg.wrapv = wrap;
kernel_arg.src_addr = src_addr;
kernel_arg.src_logwidth = src_logwidth;
kernel_arg.src_logheight = src_logheight;
kernel_arg.src_addr = src_addr;
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
assert(i < TEX_LOD_MAX);
kernel_arg.mip_offs[i] = mip_offsets.at(i);
@ -269,8 +266,8 @@ int main(int argc, char *argv[]) {
// configure texture units
vx_csr_write(device, CSR_TEX_STAGE, 0);
vx_csr_write(device, CSR_TEX_WIDTH, src_logwidth);
vx_csr_write(device, CSR_TEX_HEIGHT, src_logheight);
vx_csr_write(device, CSR_TEX_LOGWIDTH, src_logwidth);
vx_csr_write(device, CSR_TEX_LOGHEIGHT, src_logheight);
vx_csr_write(device, CSR_TEX_FORMAT, format);
vx_csr_write(device, CSR_TEX_WRAPU, wrap);
vx_csr_write(device, CSR_TEX_WRAPV, wrap);
@ -281,9 +278,9 @@ int main(int argc, char *argv[]) {
vx_csr_write(device, CSR_TEX_MIPOFF(i), mip_offsets.at(i));
};
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp));
// render
std::cout << "render" << std::endl;
RT_CHECK(render(kernel_arg, dst_bufsize, dst_width, dst_height));
// cleanup
std::cout << "cleanup" << std::endl;

View file

@ -3,8 +3,8 @@
#include <string>
#include <iostream>
#include <iomanip>
#include <cocogfx/include/tga.h>
#include <cocogfx/include/png.h>
#include <cocogfx/include/tga.hpp>
#include <cocogfx/include/png.hpp>
using namespace cocogfx;

View file

@ -1,8 +1,8 @@
#include <cstdint>
#include <vector>
#include <bitmanip.h>
#include <cocogfx/include/format.h>
#include <cocogfx/include/blitter.h>
#include <cocogfx/include/format.hpp>
#include <cocogfx/include/blitter.hpp>
int LoadImage(const char *filename,
cocogfx::ePixelFormat format,

2
third_party/cocogfx vendored

@ -1 +1 @@
Subproject commit 1c6111c96cdb23c1286495be903501aff007cd75
Subproject commit ff85ba2bd69176a19e92390cca1ab9888fbbbb3e