8-bit lerp fix

This commit is contained in:
Blaise Tine 2023-03-14 18:12:11 -04:00
parent 2ec7fba3bf
commit d4e5367c18
8 changed files with 68 additions and 64 deletions

View file

@ -73,7 +73,7 @@ rcache()
do
echo -e "\n###############################################################################\n" >> $LOG_FILE
echo -e "$TEST mode=$mode" >> $LOG_FILE
CONFIGS="-DEXT_GFX_ENABLE $mode" ${VORTEX_HOME}/ci/blackbox.sh --driver=${DRIVER} --cores=${CORES} --threads=1 --app=draw3d --args="-onull -tvase.cgltrace -e -w${WIDTH} -h${HEIGHT}" --perf=4 >> $LOG_FILE
CONFIGS="-DEXT_GFX_ENABLE $mode" ${VORTEX_HOME}/ci/blackbox.sh --driver=${DRIVER} --cores=${CORES} --threads=1 --app=draw3d --args="-onull -tvase.cgltrace -w${WIDTH} -h${HEIGHT}" --perf=4 >> $LOG_FILE
done
}
@ -89,7 +89,7 @@ ocache()
do
echo -e "\n###############################################################################\n" >> $LOG_FILE
echo -e "$TEST mode=$mode" >> $LOG_FILE
CONFIGS="-DEXT_GFX_ENABLE $mode" ${VORTEX_HOME}/ci/blackbox.sh --driver=${DRIVER} --cores=${CORES} --threads=1 --app=draw3d --args="-onull -tcarnival.cgltrace -e -w${WIDTH} -h${HEIGHT}" --perf=5 >> $LOG_FILE
CONFIGS="-DEXT_GFX_ENABLE $mode" ${VORTEX_HOME}/ci/blackbox.sh --driver=${DRIVER} --cores=${CORES} --threads=1 --app=draw3d --args="-onull -tcarnival.cgltrace -w${WIDTH} -h${HEIGHT}" --perf=5 >> $LOG_FILE
done
}

View file

@ -175,24 +175,24 @@ static cocogfx::ColorARGB DoBlendMode(uint32_t mode,
std::abort();
case ROP_BLEND_MODE_ADD:
return cocogfx::ColorARGB(
cocogfx::Add8(cocogfx::Mul8(src.a, s.a), cocogfx::Mul8(dst.a, d.a)),
cocogfx::Add8(cocogfx::Mul8(src.r, s.r), cocogfx::Mul8(dst.r, d.r)),
cocogfx::Add8(cocogfx::Mul8(src.g, s.g), cocogfx::Mul8(dst.g, d.g)),
cocogfx::Add8(cocogfx::Mul8(src.b, s.b), cocogfx::Mul8(dst.b, d.b))
cocogfx::Div255(std::min<int>(src.a * s.a + dst.a * d.a, 255 * 255)),
cocogfx::Div255(std::min<int>(src.r * s.r + dst.r * d.r, 255 * 255)),
cocogfx::Div255(std::min<int>(src.g * s.g + dst.g * d.g, 255 * 255)),
cocogfx::Div255(std::min<int>(src.b * s.b + dst.b * d.b, 255 * 255))
);
case ROP_BLEND_MODE_SUB:
return cocogfx::ColorARGB(
cocogfx::Sub8(cocogfx::Mul8(src.a, s.a), cocogfx::Mul8(dst.a, d.a)),
cocogfx::Sub8(cocogfx::Mul8(src.r, s.r), cocogfx::Mul8(dst.r, d.r)),
cocogfx::Sub8(cocogfx::Mul8(src.g, s.g), cocogfx::Mul8(dst.g, d.g)),
cocogfx::Sub8(cocogfx::Mul8(src.b, s.b), cocogfx::Mul8(dst.b, d.b))
cocogfx::Div255(std::max<int>(src.a * s.a - dst.a * d.a, 0x0)),
cocogfx::Div255(std::max<int>(src.r * s.r - dst.r * d.r, 0x0)),
cocogfx::Div255(std::max<int>(src.g * s.g - dst.g * d.g, 0x0)),
cocogfx::Div255(std::max<int>(src.b * s.b - dst.b * d.b, 0x0))
);
case ROP_BLEND_MODE_REV_SUB:
return cocogfx::ColorARGB(
cocogfx::Sub8(cocogfx::Mul8(dst.a, d.a), cocogfx::Mul8(src.a, s.a)),
cocogfx::Sub8(cocogfx::Mul8(dst.r, d.r), cocogfx::Mul8(src.r, s.r)),
cocogfx::Sub8(cocogfx::Mul8(dst.g, d.g), cocogfx::Mul8(src.g, s.g)),
cocogfx::Sub8(cocogfx::Mul8(dst.b, d.b), cocogfx::Mul8(src.b, s.b))
cocogfx::Div255(std::max<int>(dst.a * d.a - src.a * s.a, 0x0)),
cocogfx::Div255(std::max<int>(dst.r * d.r - src.r * s.r, 0x0)),
cocogfx::Div255(std::max<int>(dst.g * d.g - src.g * s.g, 0x0)),
cocogfx::Div255(std::max<int>(dst.b * d.b - src.b * s.b, 0x0))
);
case ROP_BLEND_MODE_MIN:
return cocogfx::ColorARGB(

View file

@ -102,22 +102,24 @@ inline uint32_t Pack8888(uint32_t lo, uint32_t hi) {
}
inline uint32_t Lerp8888(uint32_t a, uint32_t b, uint32_t f) {
return (a + (((b - a) * f) >> 8)) & 0x00ff00ff;
uint32_t p = a * (0xff - f) + b * f;
uint32_t q = (p >> 8) & 0x00ff00ff;
return ((p + q) >> 8) & 0x00ff00ff;
}
template <uint32_t F, typename T = int32_t>
void TexAddressLinear(TFixed<F,T> fu,
TFixed<F,T> fv,
uint32_t log_width,
uint32_t log_height,
int wrapu,
int wrapv,
uint32_t* addr00,
uint32_t* addr01,
uint32_t* addr10,
uint32_t* addr11,
uint32_t* alpha,
uint32_t* beta
uint32_t log_width,
uint32_t log_height,
int wrapu,
int wrapv,
uint32_t* addr00,
uint32_t* addr01,
uint32_t* addr10,
uint32_t* addr11,
uint32_t* alpha,
uint32_t* beta
) {
auto delta_x = TFixed<F,T>::make(TFixed<F,T>::HALF >> log_width);
auto delta_y = TFixed<F,T>::make(TFixed<F,T>::HALF >> log_height);
@ -143,8 +145,8 @@ void TexAddressLinear(TFixed<F,T> fu,
*addr10 = x0 + (y1 << log_width);
*addr11 = x1 + (y1 << log_width);
*alpha = x0s & 0xff;
*beta = y0s & 0xff;
*alpha = x0s & 0xff;
*beta = y0s & 0xff;
//printf("*** fu=0x%x, fv=0x%x, u0=0x%x, u1=0x%x, v0=0x%x, v1=0x%x, x0=0x%x, x1=0x%x, y0=0x%x, y1=0x%x, addr00=0x%x, addr01=0x%x, addr10=0x%x, addr11=0x%x\n", fu.data(), fv.data(), u0, u1, v0, v1, x0, x1, y0, y1, *addr00, *addr01, *addr10, *addr11);
}
@ -152,11 +154,11 @@ void TexAddressLinear(TFixed<F,T> fu,
template <uint32_t F, typename T = int32_t>
void TexAddressPoint(TFixed<F,T> fu,
TFixed<F,T> fv,
uint32_t log_width,
uint32_t log_height,
int wrapu,
int wrapv,
uint32_t* addr
uint32_t log_width,
uint32_t log_height,
int wrapu,
int wrapv,
uint32_t* addr
) {
uint32_t u = Clamp(fu, wrapu);
uint32_t v = Clamp(fv, wrapv);

View file

@ -18,7 +18,6 @@ class GpuSW;
typedef struct {
uint32_t log_num_tasks;
bool empty_shader;
bool depth_enabled;
bool color_enabled;
bool tex_enabled;

View file

@ -363,8 +363,7 @@ void shader_function_empty(int task_id, kernel_arg_t* __UNIFORM__ arg) {
int main() {
auto __UNIFORM__ arg = reinterpret_cast<kernel_arg_t*>(KERNEL_ARG_DEV_MEM_ADDR);
auto callback = arg->empty_shader ? (vx_spawn_tasks_cb)shader_function_empty :
(vx_spawn_tasks_cb)shader_function_hw;
auto callback = (vx_spawn_tasks_cb)shader_function_hw;
#ifdef SW_ENABLE
g_gpu_sw.configure(arg);

View file

@ -36,10 +36,12 @@ const char* reference_file = nullptr;
bool sw_rast = false;
bool sw_rop = false;
bool sw_interp = false;
bool empty_shader = false;
uint32_t start_draw = 0;
uint32_t end_draw = -1;
uint32_t clear_color = 0x00000000;
uint32_t clear_depth = 0xFFFFFFFF;
uint32_t clear_depth = 0xffffffff;
uint32_t dst_width = 128;
uint32_t dst_height = 128;
@ -67,16 +69,22 @@ uint32_t tileLogSize = RASTER_TILE_LOGSIZE;
static void show_usage() {
std::cout << "Vortex 3D Rendering Test." << std::endl;
std::cout << "Usage: [-t trace] [-o output] [-r reference] [-w width] [-h height] [-e empty] [-x s/w rast] [-y s/w rop] [-z s/w interp] [-k tilelogsize]" << std::endl;
std::cout << "Usage: [-t trace] [-s startdraw] [-e enddraw] [-o output] [-r reference] [-w width] [-h height] [-e empty] [-x s/w rast] [-y s/w rop] [-z s/w interp] [-k tilelogsize]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "t:i:o:r:w:h:t:k:xyze?")) != -1) {
while ((c = getopt(argc, argv, "t:s:e:i:o:r:w:h:t:k:xyz?")) != -1) {
switch (c) {
case 't':
trace_file = optarg;
break;
case 's':
start_draw = std::atoi(optarg);
break;
case 'e':
end_draw = std::atoi(optarg);
break;
case 'o':
output_file = optarg;
break;
@ -89,9 +97,6 @@ static void parse_args(int argc, char **argv) {
case 'h':
dst_height = std::atoi(optarg);
break;
case 'e':
empty_shader = true;
break;
case 'x':
sw_rast = true;
break;
@ -158,13 +163,15 @@ int render(const CGLTrace& trace) {
std::cout << "render" << std::endl;
auto time_begin = std::chrono::high_resolution_clock::now();
uint32_t draw_idx = 0;
uint64_t instrs = 0;
uint64_t cycles = 0;
// render each draw call
for (auto& drawcall : trace.drawcalls) {
for (uint32_t d = 0, nd = trace.drawcalls.size(); d < nd; ++d) {
if (d < start_draw || d > end_draw)
continue;
auto& drawcall = trace.drawcalls.at(d);
auto& states = drawcall.states;
std::vector<uint8_t> tilebuf;
@ -181,8 +188,8 @@ int render(const CGLTrace& trace) {
if (primbuf_addr != 0) vx_mem_free(device, primbuf_addr);
RT_CHECK(vx_mem_alloc(device, tilebuf.size(), &tilebuf_addr));
RT_CHECK(vx_mem_alloc(device, primbuf.size(), &primbuf_addr));
std::cout << "tilebuf_addr=0x" << std::hex << tilebuf_addr << std::endl;
std::cout << "primbuf_addr=0x" << std::hex << primbuf_addr << std::endl;
std::cout << "tilebuf_addr=0x" << std::hex << tilebuf_addr << std::dec << std::endl;
std::cout << "primbuf_addr=0x" << std::hex << primbuf_addr << std::dec << std::endl;
uint32_t alloc_size = std::max({tilebuf.size(), primbuf.size()});
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
@ -306,7 +313,7 @@ int render(const CGLTrace& trace) {
// allocate texture memory
if (texbuf_addr != 0) vx_mem_free(device, texbuf_addr);
RT_CHECK(vx_mem_alloc(device, texbuf.size(), &texbuf_addr));
std::cout << "texbuf_addr=0x" << std::hex << texbuf_addr << std::endl;
std::cout << "texbuf_addr=0x" << std::hex << texbuf_addr << std::dec << std::endl;
// upload texture data
std::cout << "upload texture buffer" << std::endl;
@ -371,7 +378,7 @@ int render(const CGLTrace& trace) {
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Elapsed time: %lg ms\n", elapsed);
if (draw_idx < trace.drawcalls.size()-1) {
if (d < trace.drawcalls.size()-1) {
vx_dump_perf(device, stdout);
}
@ -381,8 +388,6 @@ int render(const CGLTrace& trace) {
RT_CHECK(vx_perf_counter(device, CSR_MINSTRET, -1, &instrs_));
cycles += cycles_;
instrs += instrs_;
++draw_idx;
}
// download destination buffer
@ -481,8 +486,8 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_mem_alloc(device, zbuf_size, &zbuf_addr));
RT_CHECK(vx_mem_alloc(device, cbuf_size, &cbuf_addr));
std::cout << "zbuf_addr=0x" << std::hex << zbuf_addr << std::endl;
std::cout << "cbuf_addr=0x" << std::hex << cbuf_addr << std::endl;
std::cout << "zbuf_addr=0x" << std::hex << zbuf_addr << std::dec << std::endl;
std::cout << "cbuf_addr=0x" << std::hex << cbuf_addr << std::dec << std::endl;
// allocate staging buffer
std::cout << "allocate staging buffer" << std::endl;
@ -514,7 +519,6 @@ int main(int argc, char *argv[]) {
// update kernel arguments
kernel_arg.log_num_tasks = log2ceil(num_tasks);
kernel_arg.empty_shader = empty_shader;
kernel_arg.sw_rast = sw_rast;
kernel_arg.sw_rop = sw_rop;
kernel_arg.sw_interp = sw_interp;
@ -538,14 +542,14 @@ int main(int argc, char *argv[]) {
cleanup();
if (reference_file) {
auto errors = CompareImages(output_file, reference_file, FORMAT_A8R8G8B8, 2);
auto errors = CompareImages(output_file, reference_file, FORMAT_A8R8G8B8, 0);
if (0 == errors) {
std::cout << "PASSED!" << std::endl;
} else {
std::cout << "FAILED!" << std::endl;
std::cout << "FAILED! " << errors << " errors." << std::endl;
return errors;
}
}
return 0;
}
}

View file

@ -129,8 +129,8 @@ int render(const CGLTrace& trace) {
if (primbuf_addr != 0) vx_mem_free(device, primbuf_addr);
RT_CHECK(vx_mem_alloc(device, tilebuf.size(), &tilebuf_addr));
RT_CHECK(vx_mem_alloc(device, primbuf.size(), &primbuf_addr));
std::cout << "tilebuf_addr=0x" << std::hex << tilebuf_addr << std::endl;
std::cout << "primbuf_addr=0x" << std::hex << primbuf_addr << std::endl;
std::cout << "tilebuf_addr=0x" << std::hex << tilebuf_addr << std::dec << std::endl;
std::cout << "primbuf_addr=0x" << std::hex << primbuf_addr << std::dec << std::endl;
uint32_t alloc_size = std::max({tilebuf.size(), primbuf.size(), sizeof(kernel_arg_t)});
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
@ -257,7 +257,7 @@ int main(int argc, char *argv[]) {
// allocate device memory
RT_CHECK(vx_mem_alloc(device, cbuf_size, &cbuf_addr));
std::cout << "cbuf_addr=0x" << std::hex << cbuf_addr << std::endl;
std::cout << "cbuf_addr=0x" << std::hex << cbuf_addr << std::dec << std::endl;
// allocate staging buffer
{
@ -301,7 +301,7 @@ int main(int argc, char *argv[]) {
if (0 == errors) {
std::cout << "PASSED!" << std::endl;
} else {
std::cout << "FAILED!" << std::endl;
std::cout << "FAILED! " << errors << " errors." << std::endl;
return errors;
}
}

View file

@ -219,8 +219,8 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_mem_alloc(device, src_bufsize, &src_addr));
RT_CHECK(vx_mem_alloc(device, dst_bufsize, &dst_addr));
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
std::cout << "src_addr=0x" << std::hex << src_addr << std::dec << std::endl;
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::dec << std::endl;
// allocate staging buffer
std::cout << "allocate staging buffer" << std::endl;
@ -300,7 +300,7 @@ int main(int argc, char *argv[]) {
if (0 == errors) {
std::cout << "PASSED!" << std::endl;
} else {
std::cout << "FAILED!" << std::endl;
std::cout << "FAILED! " << errors << " errors." << std::endl;
return errors;
}
}