mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
rasterizer simulator updates
This commit is contained in:
parent
bf23a282b1
commit
45150919e3
45 changed files with 389 additions and 230 deletions
|
@ -18,6 +18,7 @@ make -C tests/riscv/isa run-rtlsim
|
|||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -osoccer_result.png -rsoccer_ref_g1.png -g1"
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -osoccer_result.png -rsoccer_ref_g1.png -g1"
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=vlsim --app=tex --args="-isoccer.png -osoccer_result.png -rsoccer_ref_g1.png -g1"
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d
|
||||
|
||||
echo "smoke tests done!"
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ else
|
|||
CFLAGS += -march=rv64imfd -mabi=lp64d
|
||||
endif
|
||||
|
||||
CFLAGS += -O3 -mcmodel=medany -Wstack-usage=1024 -fno-exceptions -fdata-sections -ffunction-sections
|
||||
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I./include -I../hw
|
||||
|
||||
PROJECT = libvortexrt
|
||||
|
|
|
@ -106,9 +106,9 @@ extern "C" {
|
|||
})
|
||||
|
||||
// Interpolate
|
||||
#define vx_interp(f, a, b, c) ({ \
|
||||
#define vx_interp(a, b, c) ({ \
|
||||
unsigned __r; \
|
||||
__asm__ __volatile__ (".insn r4 0x2b, 2, %1, %0, %2, %3, %4" : "=r"(__r) : "i"(f), "r"(a), "r"(b), "r"(c)); \
|
||||
__asm__ __volatile__ (".insn r4 0x2b, 1, 1, %0, %1, %2, %3" : "=r"(__r) : "r"(a), "r"(b), "r"(c)); \
|
||||
__r; \
|
||||
})
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ Core::Core(const SimContext& ctx,
|
|||
const DCRS &dcrs,
|
||||
RasterUnit::Ptr raster_unit,
|
||||
RopUnit::Ptr rop_unit)
|
||||
: SimObject(ctx, "Core")
|
||||
: SimObject(ctx, "core")
|
||||
, MemRspPort(this)
|
||||
, MemReqPort(this)
|
||||
, id_(id)
|
||||
|
@ -78,7 +78,7 @@ Core::Core(const SimContext& ctx,
|
|||
DCACHE_MSHR_SIZE, // mshr
|
||||
4, // pipeline latency
|
||||
}))
|
||||
, sharedmem_(SharedMem::Create("smem", SharedMem::Config{
|
||||
, sharedmem_(SharedMem::Create("shared_mem", SharedMem::Config{
|
||||
uint32_t(SMEM_LOCAL_SIZE) * arch.num_warps() * arch.num_threads(),
|
||||
arch.num_threads(),
|
||||
arch.num_threads(),
|
||||
|
@ -95,12 +95,12 @@ Core::Core(const SimContext& ctx,
|
|||
warps_.at(i) = std::make_shared<Warp>(this, i);
|
||||
}
|
||||
|
||||
tex_unit_ = TexUnit::Create("tex", TexUnit::Config{
|
||||
tex_unit_ = TexUnit::Create("tex_unit", TexUnit::Config{
|
||||
1, // address latency
|
||||
2, // sampler latency
|
||||
}, this);
|
||||
raster_srv_ = RasterSrv::Create("rastersrv", this, raster_unit);
|
||||
rop_srv_ = RopSrv::Create("ropsrv", this, rop_unit);
|
||||
raster_srv_ = RasterSrv::Create("raster_srv", this, raster_unit);
|
||||
rop_srv_ = RopSrv::Create("rop_srv", this, rop_unit);
|
||||
|
||||
// register execute units
|
||||
exe_units_.at((int)ExeType::NOP) = SimPlatform::instance().create_object<NopUnit>(this);
|
||||
|
|
|
@ -373,12 +373,12 @@ static const char* op_string(const Instr &instr) {
|
|||
case 1: {
|
||||
switch (func2) {
|
||||
case 0: return "CMOV";
|
||||
case 1: return "INTERP";
|
||||
case 2: return "IMADD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
case 2: return "INTERP";
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -322,6 +322,9 @@ void GpuUnit::tick() {
|
|||
case GpuType::ROP:
|
||||
rop_srv_->Input.send(trace, 1);
|
||||
break;
|
||||
case GpuType::INTERP:
|
||||
Output.send(trace, 6);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -1444,8 +1444,8 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
auto depth = rsdata[t][1].i;
|
||||
auto x_y = core_->raster_srv_->csr_read(id_, t, CSR_RASTER_X_Y);
|
||||
auto mask_pid = core_->raster_srv_->csr_read(id_, t, CSR_RASTER_MASK_PID);
|
||||
auto x = x_y & 0xffff;
|
||||
auto y = x_y > 16;
|
||||
auto x = x_y & 0xffff;
|
||||
auto y = x_y >> 16;
|
||||
auto mask = mask_pid & 0xf;
|
||||
core_->rop_srv_->write(x, y, mask, color, depth);
|
||||
}
|
||||
|
@ -1495,6 +1495,23 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
rd_write = true;
|
||||
} break;
|
||||
case 1: { // INTERP
|
||||
trace->exe_type = ExeType::GPU;
|
||||
trace->gpu_type = GpuType::INTERP;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
trace->used_iregs.set(rsrc2);
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
continue;
|
||||
auto a = rsdata[t][0].i;
|
||||
auto b = rsdata[t][1].i;
|
||||
auto c = rsdata[t][2].i;
|
||||
auto result = core_->raster_srv_->interpolate(id_, t, a, b, c);
|
||||
rddata[t].i = result;
|
||||
}
|
||||
rd_write = true;
|
||||
} break;
|
||||
case 2: { // IMADD
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->alu_type = AluType::IMADD;
|
||||
|
@ -1511,25 +1528,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
case 2: { // INTERP
|
||||
trace->exe_type = ExeType::GPU;
|
||||
trace->gpu_type = GpuType::INTERP;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
trace->used_iregs.set(rsrc2);
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
continue;
|
||||
auto q = func2;
|
||||
auto a = rsdata[t][0].i;
|
||||
auto b = rsdata[t][1].i;
|
||||
auto c = rsdata[t][2].i;
|
||||
auto result = core_->raster_srv_->interpolate(id_, t, q, a, b, c);
|
||||
rddata[t].i = result;
|
||||
}
|
||||
rd_write = true;
|
||||
} break;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -28,6 +28,12 @@ public:
|
|||
uint32_t num_cores = arch.num_cores();
|
||||
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
||||
|
||||
// create gpu blocks
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
raster_units_.at(i) = RasterUnit::Create("raster_unit", arch, dcrs_.raster_dcrs, RASTER_TILE_LOGSIZE, RASTER_BLOCK_LOGSIZE);
|
||||
rop_units_.at(i) = RopUnit::Create("rop_unit", arch, dcrs_.rop_dcrs);
|
||||
}
|
||||
|
||||
// create cores
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
auto j = i / cores_per_cluster;
|
||||
|
@ -128,10 +134,7 @@ public:
|
|||
auto& core = cores_.at((i * cores_per_cluster) + j);
|
||||
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
||||
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
||||
}
|
||||
|
||||
raster_units_.at(i) = RasterUnit::Create("raster", arch, dcrs_.raster_dcrs, RASTER_TILE_LOGSIZE, RASTER_BLOCK_LOGSIZE);
|
||||
rop_units_.at(i) = RopUnit::Create("rop", arch, dcrs_.rop_dcrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,22 +7,36 @@
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
using fixed23_t = cocogfx::TFixed<23>;
|
||||
using fixed24_t = cocogfx::TFixed<24>;
|
||||
|
||||
using vec2_fx2_t = cocogfx::TVector2<fixed23_t>;
|
||||
using vec2_fx2_t = cocogfx::TVector2<fixed24_t>;
|
||||
|
||||
struct csr_t {
|
||||
uint32_t frag;
|
||||
RasterUnit::Stamp *stamp;
|
||||
class CSR {
|
||||
private:
|
||||
RasterUnit::Stamp *stamp_;
|
||||
|
||||
public:
|
||||
uint32_t frag;
|
||||
vec2_fx2_t gradients[4];
|
||||
|
||||
csr_t(RasterUnit::Stamp *stamp = nullptr)
|
||||
: stamp(stamp)
|
||||
CSR()
|
||||
: stamp_(nullptr)
|
||||
{}
|
||||
|
||||
~csr_t() {
|
||||
delete stamp;
|
||||
~CSR() {
|
||||
if (stamp_ )
|
||||
delete stamp_;
|
||||
}
|
||||
|
||||
void set_stamp(RasterUnit::Stamp *stamp) {
|
||||
if (stamp_ )
|
||||
delete stamp_;
|
||||
stamp_ = stamp;
|
||||
}
|
||||
|
||||
RasterUnit::Stamp* get_stamp() const {
|
||||
return stamp_;
|
||||
}
|
||||
};
|
||||
|
||||
class RasterSrv::Impl {
|
||||
|
@ -31,7 +45,7 @@ private:
|
|||
Core* core_;
|
||||
const Arch& arch_;
|
||||
RasterUnit::Ptr raster_unit_;
|
||||
std::vector<csr_t> csrs_;
|
||||
std::vector<CSR> csrs_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
public:
|
||||
|
@ -42,7 +56,7 @@ public:
|
|||
, core_(core)
|
||||
, arch_(core->arch())
|
||||
, raster_unit_(raster_unit)
|
||||
, csrs_(core->arch().num_cores() * core->arch().num_warps() * core->arch().num_threads())
|
||||
, csrs_(core->arch().num_warps() * core->arch().num_threads())
|
||||
{}
|
||||
|
||||
~Impl() {}
|
||||
|
@ -56,17 +70,20 @@ public:
|
|||
auto& csr = csrs_.at(ltid);
|
||||
switch (addr) {
|
||||
case CSR_RASTER_X_Y:
|
||||
return (csr.stamp->y << 16) | csr.stamp->x;
|
||||
return (csr.get_stamp()->y << 16) | csr.get_stamp()->x;
|
||||
case CSR_RASTER_MASK_PID:
|
||||
return (csr.stamp->mask << 4) | csr.stamp->pid;
|
||||
return (csr.get_stamp()->pid << 4) | csr.get_stamp()->mask;
|
||||
case CSR_RASTER_FRAG:
|
||||
return csr.frag;
|
||||
case CSR_RASTER_BCOORD_X:
|
||||
return csr.stamp->bcoords[csr.frag].x.data();
|
||||
printf("bcoord.x=%d\n", csr.get_stamp()->bcoords[csr.frag].x.data());
|
||||
return csr.get_stamp()->bcoords[csr.frag].x.data();
|
||||
case CSR_RASTER_BCOORD_Y:
|
||||
return csr.stamp->bcoords[csr.frag].y.data();
|
||||
printf("bcoord.y=%d\n", csr.get_stamp()->bcoords[csr.frag].y.data());
|
||||
return csr.get_stamp()->bcoords[csr.frag].y.data();
|
||||
case CSR_RASTER_BCOORD_Z:
|
||||
return csr.stamp->bcoords[csr.frag].z.data();
|
||||
printf("bcoord.z=%d\n", csr.get_stamp()->bcoords[csr.frag].z.data());
|
||||
return csr.get_stamp()->bcoords[csr.frag].z.data();
|
||||
case CSR_RASTER_GRAD_X:
|
||||
return csr.gradients[csr.frag].x.data();
|
||||
case CSR_RASTER_GRAD_Y:
|
||||
|
@ -85,10 +102,12 @@ public:
|
|||
csr.frag = value;
|
||||
break;
|
||||
case CSR_RASTER_GRAD_X:
|
||||
csr.gradients[csr.frag].x = fixed23_t::make(value);
|
||||
csr.gradients[csr.frag].x = fixed24_t::make(value);
|
||||
printf("grad.x=%d\n", csr.gradients[csr.frag].x.data());
|
||||
break;
|
||||
case CSR_RASTER_GRAD_Y:
|
||||
csr.gradients[csr.frag].y = fixed23_t::make(value);
|
||||
csr.gradients[csr.frag].y = fixed24_t::make(value);
|
||||
printf("grad.y=%d\n", csr.gradients[csr.frag].y.data());
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
|
@ -101,26 +120,31 @@ public:
|
|||
return 0;
|
||||
uint32_t ltid = wid * arch_.num_threads() + tid;
|
||||
auto& csr = csrs_.at(ltid);
|
||||
if (csr.stamp) {
|
||||
delete csr.stamp;
|
||||
}
|
||||
csr.stamp = stamp;
|
||||
csr.set_stamp(stamp);
|
||||
return (stamp->pid << 1) | 1;
|
||||
}
|
||||
|
||||
int32_t interpolate(uint32_t wid, uint32_t tid,
|
||||
uint32_t quad, int32_t a, int32_t b, int32_t c) {
|
||||
int32_t interpolate(uint32_t wid, uint32_t tid, int32_t a, int32_t b, int32_t c) {
|
||||
uint32_t ltid = wid * arch_.num_threads() + tid;
|
||||
auto& csr = csrs_.at(ltid);
|
||||
auto afx = fixed23_t::make(a);
|
||||
auto bfx = fixed23_t::make(b);
|
||||
auto cfx = fixed23_t::make(c);
|
||||
auto out = cocogfx::Dot<fixed23_t>(afx, csr.gradients[quad].x, bfx, csr.gradients[quad].y) + cfx;
|
||||
auto ax = fixed24_t::make(a);
|
||||
auto bx = fixed24_t::make(b);
|
||||
auto cx = fixed24_t::make(c);
|
||||
auto out = cocogfx::Dot<fixed24_t>(ax, csr.gradients[csr.frag].x, bx, csr.gradients[csr.frag].y) + cx;
|
||||
return out.data();
|
||||
}
|
||||
|
||||
void tick() {
|
||||
//--
|
||||
// check input queue
|
||||
if (simobject_->Input.empty())
|
||||
return;
|
||||
|
||||
auto trace = simobject_->Input.front();
|
||||
|
||||
simobject_->Output.send(trace, 1);
|
||||
|
||||
auto time = simobject_->Input.pop();
|
||||
perf_stats_.stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
|
@ -160,9 +184,8 @@ uint32_t RasterSrv::fetch(uint32_t wid, uint32_t tid) {
|
|||
return impl_->fetch(wid, tid);
|
||||
}
|
||||
|
||||
int32_t RasterSrv::interpolate(uint32_t wid, uint32_t tid,
|
||||
uint32_t q, int32_t a, int32_t b, int32_t c) {
|
||||
return impl_->interpolate(wid, tid, q, a, b, c);
|
||||
int32_t RasterSrv::interpolate(uint32_t wid, uint32_t tid, int32_t a, int32_t b, int32_t c) {
|
||||
return impl_->interpolate(wid, tid, a, b, c);
|
||||
}
|
||||
|
||||
void RasterSrv::tick() {
|
||||
|
|
|
@ -13,10 +13,10 @@ class RasterUnit;
|
|||
class RasterSrv : public SimObject<RasterSrv> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
: stalls(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -38,7 +38,7 @@ public:
|
|||
|
||||
uint32_t fetch(uint32_t wid, uint32_t tid);
|
||||
|
||||
int32_t interpolate(uint32_t wid, uint32_t tid, uint32_t q, int32_t a, int32_t b, int32_t c);
|
||||
int32_t interpolate(uint32_t wid, uint32_t tid, int32_t a, int32_t b, int32_t c);
|
||||
|
||||
void tick();
|
||||
|
||||
|
|
|
@ -1,22 +1,24 @@
|
|||
#include "raster_unit.h"
|
||||
#include "core.h"
|
||||
#include <VX_config.h>
|
||||
#include "mempool.h"
|
||||
#include <cocogfx/include/fixed.hpp>
|
||||
#include <cocogfx/include/math.hpp>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
#define STAMP_POOL_MAX_SIZE 1024
|
||||
|
||||
using fixed16_t = cocogfx::TFixed<16>;
|
||||
using fixed23_t = cocogfx::TFixed<23>;
|
||||
using fixed24_t = cocogfx::TFixed<23>;
|
||||
|
||||
using vec2_fx_t = cocogfx::TVector2<fixed16_t>;
|
||||
using vec3_fx_t = cocogfx::TVector3<fixed16_t>;
|
||||
|
||||
using vec2_fx2_t = cocogfx::TVector2<fixed23_t>;
|
||||
using vec3_fx2_t = cocogfx::TVector3<fixed23_t>;
|
||||
using vec2_fx2_t = cocogfx::TVector2<fixed24_t>;
|
||||
using vec3_fx2_t = cocogfx::TVector3<fixed24_t>;
|
||||
|
||||
using rect_u_t = cocogfx::TRect<uint32_t>;
|
||||
|
||||
struct primitive_t {
|
||||
vec3_fx_t edges[3];
|
||||
fixed16_t extents[3];
|
||||
|
@ -55,7 +57,8 @@ private:
|
|||
uint32_t num_prims_;
|
||||
uint32_t cur_tile_;
|
||||
uint32_t cur_prim_;
|
||||
std::list<RasterUnit::Stamp*> stamp_queue_;
|
||||
std::queue<RasterUnit::Stamp*> stamp_queue_;
|
||||
MemoryPool<RasterUnit::Stamp> stamp_allocator_;
|
||||
bool initialized_;
|
||||
|
||||
void renderQuad(const primitive_t& primitive,
|
||||
|
@ -64,6 +67,7 @@ private:
|
|||
fixed16_t e0,
|
||||
fixed16_t e1,
|
||||
fixed16_t e2) {
|
||||
printf("Quad (%d,%d) :\n", x, y);
|
||||
RasterUnit::Stamp stamp;
|
||||
stamp.x = x;
|
||||
stamp.y = y;
|
||||
|
@ -81,7 +85,7 @@ private:
|
|||
stamp.mask |= (1 << f);
|
||||
stamp.bcoords[f].x = ee0;
|
||||
stamp.bcoords[f].y = ee1;
|
||||
stamp.bcoords[f].x = ee2;
|
||||
stamp.bcoords[f].z = ee2;
|
||||
}
|
||||
// update edge equation x components
|
||||
ee0 += primitive.edges[0].x;
|
||||
|
@ -96,17 +100,17 @@ private:
|
|||
|
||||
// submit stamp
|
||||
if (stamp.mask) {
|
||||
stamp_queue_.push_back(new RasterUnit::Stamp(stamp));
|
||||
stamp_queue_.push(new RasterUnit::Stamp(stamp));
|
||||
}
|
||||
}
|
||||
|
||||
void renderBlock(uint32_t subBlockLogSize,
|
||||
const primitive_t& primitive,
|
||||
uint32_t x,
|
||||
uint32_t y,
|
||||
fixed16_t e0,
|
||||
fixed16_t e1,
|
||||
fixed16_t e2) {
|
||||
const primitive_t& primitive,
|
||||
uint32_t x,
|
||||
uint32_t y,
|
||||
fixed16_t e0,
|
||||
fixed16_t e1,
|
||||
fixed16_t e2) {
|
||||
// check if block overlap triangle
|
||||
if ((e0 + (primitive.extents[0] << subBlockLogSize)) < fxZero
|
||||
|| (e1 + (primitive.extents[1] << subBlockLogSize)) < fxZero
|
||||
|
@ -114,6 +118,8 @@ private:
|
|||
return;
|
||||
|
||||
if (subBlockLogSize > 1) {
|
||||
//printf("Block (%d,%d) :\n", x, y);
|
||||
|
||||
--subBlockLogSize;
|
||||
auto subBlockSize = 1 << subBlockLogSize;
|
||||
// draw top-left subtile
|
||||
|
@ -175,6 +181,8 @@ private:
|
|||
return;
|
||||
|
||||
if (subTileLogSize > block_logsize_) {
|
||||
//if (subTileLogSize == tile_logsize_) printf("Tile (%d,%d) :\n", x, y);
|
||||
|
||||
--subTileLogSize;
|
||||
auto subTileSize = 1 << subTileLogSize;
|
||||
// draw top-left subtile
|
||||
|
@ -228,10 +236,11 @@ private:
|
|||
}
|
||||
|
||||
void initialize() {
|
||||
num_tiles_ = dcrs_.at(DCR_RASTER_TILE_COUNT);
|
||||
tbuf_baseaddr_ = dcrs_.at(DCR_RASTER_TBUF_ADDR);
|
||||
pbuf_baseaddr_ = dcrs_.at(DCR_RASTER_PBUF_ADDR);
|
||||
pbuf_stride_ = dcrs_.at(DCR_RASTER_PBUF_STRIDE);
|
||||
// get device configuration
|
||||
num_tiles_ = dcrs_.at(RASTER_STATE_TILE_COUNT);
|
||||
tbuf_baseaddr_ = dcrs_.at(RASTER_STATE_TBUF_ADDR);
|
||||
pbuf_baseaddr_ = dcrs_.at(RASTER_STATE_PBUF_ADDR);
|
||||
pbuf_stride_ = dcrs_.at(RASTER_STATE_PBUF_STRIDE);
|
||||
|
||||
tbuf_addr_ = tbuf_baseaddr_;
|
||||
cur_tile_ = 0;
|
||||
|
@ -241,8 +250,7 @@ private:
|
|||
initialized_ = true;
|
||||
}
|
||||
|
||||
void renderNextPrimitive() {
|
||||
assert(cur_prim_ < num_prims_);
|
||||
void renderNextPrimitive() {
|
||||
// get current tile header
|
||||
if (0 == num_prims_) {
|
||||
mem_->read(&tile_xy_, tbuf_addr_, 4);
|
||||
|
@ -256,7 +264,7 @@ private:
|
|||
mem_->read(&cur_prim_, tbuf_addr_, 4);
|
||||
tbuf_addr_ += 4;
|
||||
|
||||
// get primitive edges and bbox
|
||||
// get primitive edges
|
||||
primitive_t primitive;
|
||||
auto pbuf_addr = pbuf_baseaddr_ + cur_prim_ * pbuf_stride_;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
|
@ -268,8 +276,8 @@ private:
|
|||
pbuf_addr += 4;
|
||||
}
|
||||
|
||||
uint32_t tx = tile_xy_ & 0xffff;
|
||||
uint32_t ty = tile_xy_ >> 16;
|
||||
uint32_t tx = (tile_xy_ & 0xffff) << tile_logsize_;
|
||||
uint32_t ty = (tile_xy_ >> 16) << tile_logsize_;
|
||||
|
||||
// Add tile corner edge offsets
|
||||
primitive.extents[0] = calcEdgeExtents(primitive.edges[0]);
|
||||
|
@ -291,6 +299,7 @@ private:
|
|||
// Advance next primitive
|
||||
++cur_prim_;
|
||||
if (cur_prim_ == num_prims_) {
|
||||
cur_prim_ = 0;
|
||||
num_prims_ = 0;
|
||||
++cur_tile_;
|
||||
}
|
||||
|
@ -305,6 +314,7 @@ public:
|
|||
, dcrs_(dcrs)
|
||||
, tile_logsize_(tile_logsize)
|
||||
, block_logsize_(block_logsize)
|
||||
, stamp_allocator_(STAMP_POOL_MAX_SIZE)
|
||||
, initialized_(false) {
|
||||
assert(block_logsize >= 1);
|
||||
assert(tile_logsize >= block_logsize);
|
||||
|
@ -332,7 +342,7 @@ public:
|
|||
this->renderNextPrimitive();
|
||||
}
|
||||
auto stamp = stamp_queue_.front();
|
||||
stamp_queue_.pop_front();
|
||||
stamp_queue_.pop();
|
||||
return stamp;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,14 +22,16 @@ public:
|
|||
uint32_t pid;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, stalls(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
class DCRS {
|
||||
private:
|
||||
std::array<uint32_t, RASTER_STATE_COUNT> states_;
|
||||
|
|
|
@ -52,7 +52,16 @@ public:
|
|||
}
|
||||
|
||||
void tick() {
|
||||
//--
|
||||
// check input queue
|
||||
if (simobject_->Input.empty())
|
||||
return;
|
||||
|
||||
auto trace = simobject_->Input.front();
|
||||
|
||||
simobject_->Output.send(trace, 1);
|
||||
|
||||
auto time = simobject_->Input.pop();
|
||||
perf_stats_.stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
|
|
|
@ -13,10 +13,10 @@ class RopUnit;
|
|||
class RopSrv : public SimObject<RopSrv> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
: stalls(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
using fixed23_t = cocogfx::TFixed<23>;
|
||||
using fixed24_t = cocogfx::TFixed<23>;
|
||||
|
||||
static bool DoCompare(uint32_t func, uint32_t a, uint32_t b) {
|
||||
switch (func) {
|
||||
|
@ -243,44 +243,52 @@ private:
|
|||
bool initialized_;
|
||||
|
||||
void initialize() {
|
||||
buf_baseaddr_ = dcrs_.at(DCR_ROP_ZBUF_ADDR);
|
||||
buf_pitch_ = dcrs_.at(DCR_ROP_ZBUF_PITCH);
|
||||
depth_func_ = dcrs_.at(DCR_ROP_DEPTH_FUNC);
|
||||
depth_mask_ = dcrs_.at(DCR_ROP_DEPTH_MASK);
|
||||
stencil_front_func_ = dcrs_.at(DCR_ROP_STENCIL_FUNC) & 0xffff;
|
||||
stencil_front_zpass_= dcrs_.at(DCR_ROP_STENCIL_ZPASS) & 0xffff;
|
||||
stencil_front_zfail_= dcrs_.at(DCR_ROP_STENCIL_ZFAIL) & 0xffff;
|
||||
stencil_front_fail_ = dcrs_.at(DCR_ROP_STENCIL_FAIL) & 0xffff;
|
||||
stencil_front_mask_ = dcrs_.at(DCR_ROP_STENCIL_MASK) & 0xffff;
|
||||
stencil_front_ref_ = dcrs_.at(DCR_ROP_STENCIL_REF) & 0xffff;
|
||||
stencil_back_func_ = dcrs_.at(DCR_ROP_STENCIL_FUNC) >> 16;
|
||||
stencil_back_zpass_ = dcrs_.at(DCR_ROP_STENCIL_ZPASS) >> 16;
|
||||
stencil_back_zfail_ = dcrs_.at(DCR_ROP_STENCIL_ZFAIL) >> 16;
|
||||
stencil_back_fail_ = dcrs_.at(DCR_ROP_STENCIL_FAIL) >> 16;
|
||||
stencil_back_mask_ = dcrs_.at(DCR_ROP_STENCIL_MASK) >> 16;
|
||||
stencil_back_ref_ = dcrs_.at(DCR_ROP_STENCIL_REF) >> 16;
|
||||
depth_enabled_ = (depth_func_ != ROP_DEPTH_FUNC_ALWAYS) || (depth_mask_ != 0);
|
||||
stencil_front_enabled_ = (stencil_front_func_ != ROP_DEPTH_FUNC_ALWAYS) || depth_enabled_;
|
||||
stencil_back_enabled_ = (stencil_back_func_ != ROP_DEPTH_FUNC_ALWAYS) || depth_enabled_;
|
||||
// get device configuration
|
||||
buf_baseaddr_ = dcrs_.at(ROP_STATE_ZBUF_ADDR);
|
||||
buf_pitch_ = dcrs_.at(ROP_STATE_ZBUF_PITCH);
|
||||
depth_func_ = dcrs_.at(ROP_STATE_DEPTH_FUNC);
|
||||
depth_mask_ = dcrs_.at(ROP_STATE_DEPTH_MASK);
|
||||
stencil_front_func_ = dcrs_.at(ROP_STATE_STENCIL_FUNC) & 0xffff;
|
||||
stencil_front_zpass_= dcrs_.at(ROP_STATE_STENCIL_ZPASS) & 0xffff;
|
||||
stencil_front_zfail_= dcrs_.at(ROP_STATE_STENCIL_ZFAIL) & 0xffff;
|
||||
stencil_front_fail_ = dcrs_.at(ROP_STATE_STENCIL_FAIL) & 0xffff;
|
||||
stencil_front_mask_ = dcrs_.at(ROP_STATE_STENCIL_MASK) & 0xffff;
|
||||
stencil_front_ref_ = dcrs_.at(ROP_STATE_STENCIL_REF) & 0xffff;
|
||||
stencil_back_func_ = dcrs_.at(ROP_STATE_STENCIL_FUNC) >> 16;
|
||||
stencil_back_zpass_ = dcrs_.at(ROP_STATE_STENCIL_ZPASS) >> 16;
|
||||
stencil_back_zfail_ = dcrs_.at(ROP_STATE_STENCIL_ZFAIL) >> 16;
|
||||
stencil_back_fail_ = dcrs_.at(ROP_STATE_STENCIL_FAIL) >> 16;
|
||||
stencil_back_mask_ = dcrs_.at(ROP_STATE_STENCIL_MASK) >> 16;
|
||||
stencil_back_ref_ = dcrs_.at(ROP_STATE_STENCIL_REF) >> 16;
|
||||
|
||||
depth_enabled_ = !((depth_func_ == ROP_DEPTH_FUNC_ALWAYS) && !depth_mask_);
|
||||
|
||||
stencil_front_enabled_ = !((stencil_front_func_ == ROP_DEPTH_FUNC_ALWAYS)
|
||||
&& (stencil_front_zpass_ == ROP_STENCIL_OP_KEEP)
|
||||
&& (stencil_front_zfail_ == ROP_STENCIL_OP_KEEP));
|
||||
|
||||
stencil_back_enabled_ = !((stencil_back_func_ == ROP_DEPTH_FUNC_ALWAYS)
|
||||
&& (stencil_back_zpass_ == ROP_STENCIL_OP_KEEP)
|
||||
&& (stencil_back_zfail_ == ROP_STENCIL_OP_KEEP));
|
||||
initialized_ = true;
|
||||
}
|
||||
|
||||
uint32_t doDepthTest(uint32_t x, uint32_t y, uint32_t mask, uint32_t depth) {
|
||||
uint32_t result_mask = 0;
|
||||
uint32_t depth_ref = depth & fixed23_t::MASK;
|
||||
uint32_t depth_ref = depth & fixed24_t::MASK;
|
||||
|
||||
for (uint32_t j = 0; j < 2; ++j) {
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
uint32_t f = j * 2 + i;
|
||||
if (mask & (1 << f)) {
|
||||
uint32_t stored_value;
|
||||
uint32_t buf_addr = buf_baseaddr_ + y * buf_pitch_ + x * 4;
|
||||
uint32_t buf_addr = buf_baseaddr_ + (y + j) * buf_pitch_ + (x + i) * 4;
|
||||
mem_->read(&stored_value, buf_addr, 4);
|
||||
uint32_t depth_val = stored_value & 0xffffff;
|
||||
uint32_t depth_val = stored_value & fixed24_t::MASK;
|
||||
auto passed = DoCompare(depth_func_, depth_ref, depth_val);
|
||||
if (passed) {
|
||||
if (depth_mask_) {
|
||||
auto write_value = (stored_value & ~0xffffff) | (depth_ref & 0xffffff);
|
||||
auto write_value = (stored_value & ~fixed24_t::MASK) | (depth_ref & fixed24_t::MASK);
|
||||
mem_->write(&write_value, buf_addr, 4);
|
||||
}
|
||||
result_mask |= (1 << f);
|
||||
|
@ -293,7 +301,7 @@ private:
|
|||
|
||||
uint32_t doStencilTest(uint32_t x, uint32_t y, uint32_t mask, uint32_t face, uint32_t depth) {
|
||||
uint32_t result_mask = 0;
|
||||
auto depth_ref = depth & fixed23_t::MASK;
|
||||
auto depth_ref = depth & fixed24_t::MASK;
|
||||
auto stencil_func = face ? stencil_back_func_ : stencil_front_func_;
|
||||
auto stencil_mask = face ? stencil_back_mask_ : stencil_front_mask_;
|
||||
auto stencil_ref = face ? stencil_back_ref_ : stencil_front_ref_;
|
||||
|
@ -304,7 +312,7 @@ private:
|
|||
uint32_t f = j * 2 + i;
|
||||
if (mask & (1 << f)) {
|
||||
uint32_t stored_value;
|
||||
uint32_t buf_addr = buf_baseaddr_ + y * buf_pitch_ + x * 4;
|
||||
uint32_t buf_addr = buf_baseaddr_ + (y + j) * buf_pitch_ + (x + i) * 4;
|
||||
mem_->read(&stored_value, buf_addr, 4);
|
||||
uint32_t stencil_val = stored_value >> 24;
|
||||
uint32_t depth_val = stored_value & 0xffffff;
|
||||
|
@ -393,17 +401,18 @@ private:
|
|||
bool initialized_;
|
||||
|
||||
void initialize() {
|
||||
buf_baseaddr_ = dcrs_.at(DCR_ROP_CBUF_ADDR);
|
||||
buf_pitch_ = dcrs_.at(DCR_ROP_CBUF_PITCH);
|
||||
write_mask_ = dcrs_.at(DCR_ROP_CBUF_MASK);
|
||||
blend_mode_rgb_ = dcrs_.at(DCR_ROP_BLEND_MODE) & 0xffff;
|
||||
blend_mode_a_ = dcrs_.at(DCR_ROP_BLEND_MODE) >> 16;
|
||||
blend_src_rgb_ = (dcrs_.at(DCR_ROP_BLEND_FUNC) >> 0) & 0xff;
|
||||
blend_src_a_ = (dcrs_.at(DCR_ROP_BLEND_FUNC) >> 8) & 0xff;
|
||||
blend_dst_rgb_ = (dcrs_.at(DCR_ROP_BLEND_FUNC) >> 16) & 0xff;
|
||||
blend_dst_a_ = (dcrs_.at(DCR_ROP_BLEND_FUNC) >> 24) & 0xff;
|
||||
blend_const_ = dcrs_.at(DCR_ROP_BLEND_CONST);
|
||||
logic_op_ = dcrs_.at(DCR_ROP_LOGIC_OP);
|
||||
// get device configuration
|
||||
buf_baseaddr_ = dcrs_.at(ROP_STATE_CBUF_ADDR);
|
||||
buf_pitch_ = dcrs_.at(ROP_STATE_CBUF_PITCH);
|
||||
write_mask_ = dcrs_.at(ROP_STATE_CBUF_MASK);
|
||||
blend_mode_rgb_ = dcrs_.at(ROP_STATE_BLEND_MODE) & 0xffff;
|
||||
blend_mode_a_ = dcrs_.at(ROP_STATE_BLEND_MODE) >> 16;
|
||||
blend_src_rgb_ = (dcrs_.at(ROP_STATE_BLEND_FUNC) >> 0) & 0xff;
|
||||
blend_src_a_ = (dcrs_.at(ROP_STATE_BLEND_FUNC) >> 8) & 0xff;
|
||||
blend_dst_rgb_ = (dcrs_.at(ROP_STATE_BLEND_FUNC) >> 16) & 0xff;
|
||||
blend_dst_a_ = (dcrs_.at(ROP_STATE_BLEND_FUNC) >> 24) & 0xff;
|
||||
blend_const_ = dcrs_.at(ROP_STATE_BLEND_CONST);
|
||||
logic_op_ = dcrs_.at(ROP_STATE_LOGIC_OP);
|
||||
initialized_ = true;
|
||||
}
|
||||
|
||||
|
@ -444,7 +453,7 @@ public:
|
|||
uint32_t f = j * 2 + i;
|
||||
if (mask & (1 << f)) {
|
||||
uint32_t stored_value;
|
||||
uint32_t buf_addr = buf_baseaddr_ + y * buf_pitch_ + x * 4;
|
||||
uint32_t buf_addr = buf_baseaddr_ + (y + j) * buf_pitch_ + (x + i) * 4;
|
||||
mem_->read(&stored_value, buf_addr, 4);
|
||||
cocogfx::ColorARGB src(color);
|
||||
cocogfx::ColorARGB dst(stored_value);
|
||||
|
|
|
@ -11,11 +11,15 @@ class Core;
|
|||
|
||||
class RopUnit : public SimObject<RopUnit> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, stalls(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
|||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
|
||||
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -mattr=+vortex -float-abi=hard -code-model=small"
|
||||
K_CFLAGS += "-v -O3 -Wstack-usage=1024 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -Xclang -target-feature -Xclang +vortex -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -Xclang -target-feature -Xclang +vortex -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -3,6 +3,7 @@ XLEN ?= 32
|
|||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
LLVM_PREFIX ?= /opt/llvm-riscv
|
||||
|
||||
OPTS ?= -g1
|
||||
|
||||
|
@ -11,7 +12,12 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -std=c++17 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
#VX_CC = ${LLVM_PREFIX}/bin/clang
|
||||
#VX_CXX = ${LLVM_PREFIX}/bin/clang++
|
||||
#VX_DP = ${LLVM_PREFIX}/bin/llvm-objdump
|
||||
#VX_CP = ${LLVM_PREFIX}/bin/llvm-objcopy
|
||||
|
||||
VX_CFLAGS += -std=c++17 -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
@ -22,7 +28,7 @@ CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
|||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
|
||||
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex $(VORTEX_RT_PATH)/../third_party/cocogfx/libcocogfx.a -lz
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex $(VORTEX_RT_PATH)/../third_party/cocogfx/libcocogfx.a -lpng -lz
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
|
@ -9,12 +9,12 @@
|
|||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
using fixed16_t = cocogfx::TFixed<16>;
|
||||
using fixed23_t = cocogfx::TFixed<23>;
|
||||
using fixed24_t = cocogfx::TFixed<24>;
|
||||
|
||||
typedef struct {
|
||||
fixed23_t x;
|
||||
fixed23_t y;
|
||||
fixed23_t z;
|
||||
fixed24_t x;
|
||||
fixed24_t y;
|
||||
fixed24_t z;
|
||||
} rast_attrib_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -46,8 +46,8 @@ typedef struct {
|
|||
} rast_prim_t;
|
||||
|
||||
typedef struct {
|
||||
fixed23_t dx;
|
||||
fixed23_t dy;
|
||||
fixed24_t dx;
|
||||
fixed24_t dy;
|
||||
} rast_grad_t;
|
||||
|
||||
struct fragment_t {
|
||||
|
@ -63,12 +63,13 @@ typedef struct {
|
|||
} rast_tile_header_t;
|
||||
|
||||
typedef struct {
|
||||
bool tex_enabled;
|
||||
uint32_t prim_addr;
|
||||
uint32_t dst_addr;
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
uint8_t dst_stride;
|
||||
uint32_t dst_pitch;
|
||||
uint32_t dst_pitch;
|
||||
} kernel_arg_t;
|
||||
|
||||
#endif
|
|
@ -10,11 +10,11 @@
|
|||
auto cx = fixed16_t::make(csr_read(CSR_RASTER_BCOORD_X)); \
|
||||
auto cy = fixed16_t::make(csr_read(CSR_RASTER_BCOORD_Y)); \
|
||||
auto cz = fixed16_t::make(csr_read(CSR_RASTER_BCOORD_Z)); \
|
||||
auto r = cocogfx::Inverse<fixed23_t>(cx + cy + cz); \
|
||||
auto gx = cocogfx::Mul<fixed23_t>(cx, r); \
|
||||
auto gy = cocogfx::Mul<fixed23_t>(cy, r); \
|
||||
auto r = cocogfx::Inverse<fixed24_t>(cx + cy + cz); \
|
||||
auto gx = cocogfx::Mul<fixed24_t>(cx, r); \
|
||||
auto gy = cocogfx::Mul<fixed24_t>(cy, r); \
|
||||
csr_write(CSR_RASTER_GRAD_X, gx.data()); \
|
||||
csr_write(CSR_RASTER_GRAD_Y, gx.data()); \
|
||||
csr_write(CSR_RASTER_GRAD_Y, gy.data()); \
|
||||
}
|
||||
|
||||
#define GRADIENTS \
|
||||
|
@ -23,11 +23,15 @@
|
|||
GRADIENTS_i(2) \
|
||||
GRADIENTS_i(3) \
|
||||
|
||||
#define INTERPOLATE_i(i, dst, src) \
|
||||
csr_write(CSR_RASTER_FRAG, i); \
|
||||
dst[i] = fixed24_t::make(vx_interp(src.x.data(), src.y.data(), src.z.data()))
|
||||
|
||||
#define INTERPOLATE(dst, src) \
|
||||
dst[0] = fixed23_t::make(vx_interp(0, src.x.data(), src.y.data(), src.z.data())); \
|
||||
dst[1] = fixed23_t::make(vx_interp(1, src.x.data(), src.y.data(), src.z.data())); \
|
||||
dst[2] = fixed23_t::make(vx_interp(2, src.x.data(), src.y.data(), src.z.data())); \
|
||||
dst[3] = fixed23_t::make(vx_interp(3, src.x.data(), src.y.data(), src.z.data()))
|
||||
INTERPOLATE_i(0, dst, src); \
|
||||
INTERPOLATE_i(1, dst, src); \
|
||||
INTERPOLATE_i(2, dst, src); \
|
||||
INTERPOLATE_i(3, dst, src);
|
||||
|
||||
#define TEXTURING(dst, u, v) \
|
||||
dst[0] = vx_tex(0, u[0].data(), v[0].data(), 0); \
|
||||
|
@ -47,6 +51,18 @@
|
|||
MODULATE_i(2, dst, in1, in2_r, in2_g, in2_b, in2_a); \
|
||||
MODULATE_i(3, dst, in1, in2_r, in2_g, in2_b, in2_a)
|
||||
|
||||
#define TO_RGBA_i(i, dst, src_r, src_g, src_b, src_a) \
|
||||
dst[i].r = static_cast<uint8_t>((src_r[i].data() * 255) >> fixed24_t::FRAC); \
|
||||
dst[i].g = static_cast<uint8_t>((src_g[i].data() * 255) >> fixed24_t::FRAC); \
|
||||
dst[i].b = static_cast<uint8_t>((src_b[i].data() * 255) >> fixed24_t::FRAC); \
|
||||
dst[i].a = static_cast<uint8_t>((src_a[i].data() * 255) >> fixed24_t::FRAC); \
|
||||
|
||||
#define TO_RGBA(dst, src_r, src_g, src_b, src_a) \
|
||||
TO_RGBA_i(0, dst, src_r, src_g, src_b, src_a); \
|
||||
TO_RGBA_i(1, dst, src_r, src_g, src_b, src_a); \
|
||||
TO_RGBA_i(2, dst, src_r, src_g, src_b, src_a); \
|
||||
TO_RGBA_i(3, dst, src_r, src_g, src_b, src_a)
|
||||
|
||||
#define OUTPUT(color, z) \
|
||||
vx_rop(color[0].value, z[0].data()); \
|
||||
vx_rop(color[1].value, z[1].data()); \
|
||||
|
@ -55,7 +71,7 @@
|
|||
|
||||
void shader_function(int task_id, kernel_arg_t* kernel_arg) {
|
||||
auto prim_ptr = (rast_prim_t*)kernel_arg->prim_addr;
|
||||
fixed23_t z[4], r[4], g[4], b[4], a[4], u[4], v[4];
|
||||
fixed24_t z[4], r[4], g[4], b[4], a[4], u[4], v[4];
|
||||
cocogfx::ColorARGB tex_color[4], out_color[4];
|
||||
|
||||
for (;;) {
|
||||
|
@ -77,9 +93,12 @@ void shader_function(int task_id, kernel_arg_t* kernel_arg) {
|
|||
INTERPOLATE(u, attribs.u);
|
||||
INTERPOLATE(v, attribs.v);
|
||||
|
||||
TEXTURING(tex_color, u, v);
|
||||
|
||||
MODULATE(out_color, tex_color, r, g, b, a);
|
||||
if (kernel_arg->tex_enabled) {
|
||||
TEXTURING(tex_color, u, v);
|
||||
MODULATE(out_color, tex_color, r, g, b, a);
|
||||
} else {
|
||||
TO_RGBA(out_color, r, g, b, a);
|
||||
}
|
||||
|
||||
OUTPUT(out_color, z);
|
||||
}
|
||||
|
@ -87,10 +106,10 @@ void shader_function(int task_id, kernel_arg_t* kernel_arg) {
|
|||
|
||||
int main() {
|
||||
kernel_arg_t* arg = (kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
int num_cores = vx_num_cores();
|
||||
int num_warps = vx_num_warps();
|
||||
int num_threads = vx_num_threads();
|
||||
int total_threads = num_cores * num_warps * total_threads;
|
||||
vx_spawn_tasks(total_threads, (vx_spawn_tasks_cb)shader_function, arg);
|
||||
int total_threads = num_warps * total_threads;
|
||||
//vx_spawn_tasks(total_threads, (vx_spawn_tasks_cb)shader_function, arg);
|
||||
shader_function(0, arg);
|
||||
return 0;
|
||||
}
|
|
@ -9,7 +9,8 @@
|
|||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
#include "utils.h"
|
||||
#include "model_quad.h"
|
||||
#include "model_cube.h"
|
||||
#include "model_triangle.h"
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
|
@ -30,13 +31,15 @@ const char* input_file = "fire.png";
|
|||
const char* output_file = "output.png";
|
||||
const char* reference_file = nullptr;
|
||||
uint32_t clear_color = 0x00000000;
|
||||
uint32_t clear_depth = 0x00000000;
|
||||
bool tex_enabled = false;
|
||||
int tex_format = TEX_FORMAT_A8R8G8B8;
|
||||
ePixelFormat tex_eformat = FORMAT_A8R8G8B8;
|
||||
int tex_wrap = TEX_WRAP_CLAMP;
|
||||
int tex_filter = TEX_FILTER_POINT;
|
||||
uint32_t dst_width = 256;
|
||||
uint32_t dst_height = 256;
|
||||
const model_t& model = model_quad;
|
||||
uint32_t dst_width = 128;
|
||||
uint32_t dst_height = 128;
|
||||
const model_t& model = model_triangle;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
|
@ -107,7 +110,8 @@ void cleanup() {
|
|||
if (device) {
|
||||
vx_mem_free(device, tilebuf_addr);
|
||||
vx_mem_free(device, primbuf_addr);
|
||||
vx_mem_free(device, tbuf_addr);
|
||||
if (tex_enabled)
|
||||
vx_mem_free(device, tbuf_addr);
|
||||
vx_mem_free(device, zbuf_addr);
|
||||
vx_mem_free(device, cbuf_addr);
|
||||
vx_dev_close(device);
|
||||
|
@ -140,7 +144,12 @@ int render(uint32_t buf_addr, uint32_t buf_size, uint32_t width, uint32_t height
|
|||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
//dump_image(dst_pixels, width, height, 4);
|
||||
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
|
||||
{
|
||||
// the image is flipped
|
||||
auto pitch = width * 4;
|
||||
auto bits = dst_pixels.data() + (height-1) * pitch;
|
||||
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, bits, width, height, -pitch));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -190,7 +199,9 @@ int main(int argc, char *argv[]) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
if (!model.texture.empty()) {
|
||||
tex_enabled = true;
|
||||
|
||||
std::vector<uint8_t> staging;
|
||||
RT_CHECK(LoadImage(input_file, tex_eformat, staging, &tex_width, &tex_height));
|
||||
// check power of two support
|
||||
|
@ -200,7 +211,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
uint32_t tex_bpp = Format::GetInfo(tex_eformat).BytePerPixel;
|
||||
uint32_t tex_pitch = tex_width * tex_bpp;
|
||||
RT_CHECK(GenerateMipmaps(texbuf, mip_offsets, staging, tex_eformat, tex_width, tex_height, tex_pitch));
|
||||
RT_CHECK(GenerateMipmaps(texbuf, mip_offsets, staging.data(), tex_eformat, tex_width, tex_height, tex_pitch));
|
||||
}
|
||||
|
||||
uint32_t primbuf_stride = sizeof(rast_prim_t);
|
||||
|
@ -228,7 +239,8 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "allocate device memory" << std::endl;
|
||||
RT_CHECK(vx_mem_alloc(device, tilebuf.size(), &tilebuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, primbuf.size(), &primbuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, texbuf.size(), &tbuf_addr));
|
||||
if (tex_enabled)
|
||||
RT_CHECK(vx_mem_alloc(device, texbuf.size(), &tbuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, zbuf_size, &zbuf_addr));
|
||||
RT_CHECK(vx_mem_alloc(device, cbuf_size, &cbuf_addr));
|
||||
|
||||
|
@ -241,19 +253,26 @@ int main(int argc, char *argv[]) {
|
|||
// allocate staging buffer
|
||||
std::cout << "allocate staging buffer" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>({
|
||||
sizeof(kernel_arg_t), (uint32_t)tilebuf.size(), (uint32_t)primbuf.size(), zbuf_size, cbuf_size
|
||||
sizeof(kernel_arg_t),
|
||||
(uint32_t)tilebuf.size(),
|
||||
(uint32_t)primbuf.size(),
|
||||
(uint32_t)texbuf.size(),
|
||||
zbuf_size,
|
||||
cbuf_size
|
||||
});
|
||||
RT_CHECK(vx_buf_alloc(device, alloc_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.tex_enabled= tex_enabled;
|
||||
kernel_arg.prim_addr = primbuf_addr;
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
kernel_arg.dst_stride = cbuf_stride;
|
||||
kernel_arg.dst_pitch = cbuf_pitch;
|
||||
kernel_arg.dst_addr = cbuf_addr;
|
||||
|
||||
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(staging_buf);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
|
@ -280,7 +299,7 @@ int main(int argc, char *argv[]) {
|
|||
{
|
||||
auto buf_ptr = (uint32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < (cbuf_size/4); ++i) {
|
||||
buf_ptr[i] = clear_color;
|
||||
buf_ptr[i] = clear_depth;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_addr, cbuf_size, 0));
|
||||
}
|
||||
|
@ -296,16 +315,18 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
// configure texture units
|
||||
vx_dcr_write(device, DCR_TEX_STAGE, 0);
|
||||
vx_dcr_write(device, DCR_TEX_LOGDIM, (tex_logheight << 16) | tex_logwidth);
|
||||
vx_dcr_write(device, DCR_TEX_FORMAT, tex_format);
|
||||
vx_dcr_write(device, DCR_TEX_WRAP, (tex_wrap << 16) | tex_wrap);
|
||||
vx_dcr_write(device, DCR_TEX_FILTER, tex_filter);
|
||||
vx_dcr_write(device, DCR_TEX_ADDR, tbuf_addr);
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
vx_dcr_write(device, DCR_TEX_MIPOFF(i), mip_offsets.at(i));
|
||||
};
|
||||
if (tex_enabled) {
|
||||
vx_dcr_write(device, DCR_TEX_STAGE, 0);
|
||||
vx_dcr_write(device, DCR_TEX_LOGDIM, (tex_logheight << 16) | tex_logwidth);
|
||||
vx_dcr_write(device, DCR_TEX_FORMAT, tex_format);
|
||||
vx_dcr_write(device, DCR_TEX_WRAP, (tex_wrap << 16) | tex_wrap);
|
||||
vx_dcr_write(device, DCR_TEX_FILTER, tex_filter);
|
||||
vx_dcr_write(device, DCR_TEX_ADDR, tbuf_addr);
|
||||
for (uint32_t i = 0; i < mip_offsets.size(); ++i) {
|
||||
assert(i < TEX_LOD_MAX);
|
||||
vx_dcr_write(device, DCR_TEX_MIPOFF(i), mip_offsets.at(i));
|
||||
};
|
||||
}
|
||||
|
||||
// configure raster units
|
||||
vx_dcr_write(device, DCR_RASTER_TBUF_ADDR, tilebuf_addr);
|
||||
|
@ -319,16 +340,22 @@ int main(int argc, char *argv[]) {
|
|||
vx_dcr_write(device, DCR_ROP_CBUF_ADDR, cbuf_addr);
|
||||
vx_dcr_write(device, DCR_ROP_CBUF_PITCH, cbuf_pitch);
|
||||
vx_dcr_write(device, DCR_ROP_CBUF_MASK, 0xffffffff);
|
||||
|
||||
|
||||
// configure rop depth states
|
||||
vx_dcr_write(device, DCR_ROP_DEPTH_FUNC, ROP_DEPTH_FUNC_LESS);
|
||||
vx_dcr_write(device, DCR_ROP_DEPTH_MASK, 1);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_FUNC, ROP_DEPTH_FUNC_ALWAYS);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_ZPASS, ROP_STENCIL_OP_KEEP);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_ZPASS, ROP_STENCIL_OP_KEEP);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_FAIL, ROP_STENCIL_OP_KEEP);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_MASK, 0xff);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_REF, 0);
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_FUNC, (ROP_DEPTH_FUNC_ALWAYS << 16) // back
|
||||
| (ROP_DEPTH_FUNC_ALWAYS << 0)); // front
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_ZPASS, (ROP_STENCIL_OP_KEEP << 16) // back
|
||||
| (ROP_STENCIL_OP_KEEP << 0)); // front
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_ZPASS, (ROP_STENCIL_OP_KEEP << 16) // back
|
||||
| (ROP_STENCIL_OP_KEEP << 0)); // front
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_FAIL, (ROP_STENCIL_OP_KEEP << 16) // back
|
||||
| (ROP_STENCIL_OP_KEEP << 0)); // front
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_MASK, (0xff << 16) // back
|
||||
| (0xff << 0)); // front
|
||||
vx_dcr_write(device, DCR_ROP_STENCIL_REF, (0 << 16) // back
|
||||
| (0 << 0)); // front
|
||||
|
||||
// configure rop blend stats
|
||||
vx_dcr_write(device, DCR_ROP_BLEND_MODE, (ROP_BLEND_MODE_ADD << 16) // DST
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "common.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
typedef struct {
|
||||
float x;
|
||||
|
@ -22,4 +23,5 @@ typedef struct {
|
|||
struct model_t {
|
||||
std::vector<vertex_t> vertives;
|
||||
std::vector<primitive_t> primitives;
|
||||
std::string texture;
|
||||
};
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include "model.h"
|
||||
|
||||
const model_t model_quad = {
|
||||
const model_t model_cube = {
|
||||
{
|
||||
{-6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 0.000000, 0.000000},
|
||||
{6.337301, 0.000000, 24.177938, 24.949747, 0xffffffff, 1.000000, 0.000000},
|
||||
|
@ -51,5 +51,6 @@ const model_t model_quad = {
|
|||
{7, 0, 4},
|
||||
{5, 4, 1},
|
||||
{1, 4, 0}
|
||||
}
|
||||
},
|
||||
"fire.png"
|
||||
};
|
14
tests/regression/draw3d/model_triangle.h
Normal file
14
tests/regression/draw3d/model_triangle.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "model.h"
|
||||
|
||||
const model_t model_triangle = {
|
||||
{
|
||||
{-0.5f,-0.5f, 0.0f, 1.0, 0xff0000ff, 0.000000, 0.000000},
|
||||
{ 0.5f,-0.5f, 0.0f, 1.0, 0xff00ff00, 0.000000, 0.000000},
|
||||
{ 0.0f, 0.5f, 0.0f, 1.0, 0xffff0000, 0.000000, 0.000000}
|
||||
}, {
|
||||
{0, 1, 2},
|
||||
},
|
||||
""
|
||||
};
|
|
@ -4,9 +4,11 @@
|
|||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <cocogfx/include/tga.hpp>
|
||||
#include <cocogfx/include/png.hpp>
|
||||
#include <cocogfx/include/bmp.hpp>
|
||||
#include <cocogfx/include/fixed.hpp>
|
||||
#include <cocogfx/include/math.hpp>
|
||||
|
||||
|
@ -92,7 +94,7 @@ uint32_t Binning(std::vector<uint8_t>& tilebuf,
|
|||
|
||||
uint32_t tileLogSize = log2ceil(tileSize);
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint32_t>> tiles;
|
||||
std::map<uint32_t, std::vector<uint32_t>> tiles;
|
||||
|
||||
std::vector<rast_prim_t> rast_prims;
|
||||
rast_prims.reserve(model.primitives.size());
|
||||
|
@ -150,9 +152,9 @@ uint32_t Binning(std::vector<uint8_t>& tilebuf,
|
|||
|
||||
{
|
||||
#define ATTRIBUTE_DELTA(dx, x0, x1, x2) \
|
||||
dx.x = fixed23_t(x0 - x2); \
|
||||
dx.y = fixed23_t(x1 - x2); \
|
||||
dx.z = fixed23_t(x2)
|
||||
dx.x = fixed24_t(x0 - x2); \
|
||||
dx.y = fixed24_t(x1 - x2); \
|
||||
dx.z = fixed24_t(x2)
|
||||
|
||||
rast_prim_t rast_prim;
|
||||
rast_prim.edges[0] = edges[0];
|
||||
|
@ -211,6 +213,9 @@ uint32_t Binning(std::vector<uint8_t>& tilebuf,
|
|||
| (ee1 + (extents[1] << tileLogSize)).data()
|
||||
| (ee2 + (extents[2] << tileLogSize)).data()) >= 0) {
|
||||
// assign primitive to tile
|
||||
//auto x = tx << tileLogSize;
|
||||
//auto y = ty << tileLogSize;
|
||||
//printf("*** Tile (%d,%d) :\n", x, y);
|
||||
uint32_t tile_id = (ty << 16) | tx;
|
||||
tiles[tile_id].push_back(p);
|
||||
++num_prims;
|
||||
|
@ -285,6 +290,11 @@ int LoadImage(const char *filename,
|
|||
int ret = LoadPNG(filename, pixels, &img_width, &img_height, &img_bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else
|
||||
if (iequals(ext, "bmp")) {
|
||||
int ret = LoadBMP(filename, pixels, &img_width, &img_height, &img_bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
std::cerr << "invalid file extension: " << ext << "!" << std::endl;
|
||||
return -1;
|
||||
|
@ -311,7 +321,7 @@ int LoadImage(const char *filename,
|
|||
if (img_format != format) {
|
||||
// format conversion to RGBA
|
||||
std::vector<uint8_t> staging;
|
||||
int ret = ConvertImage(staging, format, pixels, img_format, img_width, img_height, img_width * img_bpp);
|
||||
int ret = ConvertImage(staging, format, pixels.data(), img_format, img_width, img_height, img_width * img_bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
pixels.swap(staging);
|
||||
|
@ -325,16 +335,20 @@ int LoadImage(const char *filename,
|
|||
|
||||
int SaveImage(const char *filename,
|
||||
ePixelFormat format,
|
||||
const std::vector<uint8_t> &pixels,
|
||||
const uint8_t* pixels,
|
||||
uint32_t width,
|
||||
uint32_t height) {
|
||||
uint32_t height,
|
||||
int32_t pitch) {
|
||||
uint32_t bpp = Format::GetInfo(format).BytePerPixel;
|
||||
auto ext = getFileExt(filename);
|
||||
if (iequals(ext, "tga")) {
|
||||
return SaveTGA(filename, pixels, width, height, bpp);
|
||||
return SaveTGA(filename, pixels, width, height, bpp, pitch);
|
||||
} else
|
||||
if (iequals(ext, "png")) {
|
||||
return SavePNG(filename, pixels, width, height, bpp);
|
||||
return SavePNG(filename, pixels, width, height, bpp, pitch);
|
||||
} else
|
||||
if (iequals(ext, "bmp")) {
|
||||
return SaveBMP(filename, pixels, width, height, bpp, pitch);
|
||||
} else {
|
||||
std::cerr << "invalid file extension: " << ext << "!" << std::endl;
|
||||
return -1;
|
||||
|
|
|
@ -20,9 +20,10 @@ int LoadImage(const char *filename,
|
|||
|
||||
int SaveImage(const char *filename,
|
||||
cocogfx::ePixelFormat format,
|
||||
const std::vector<uint8_t> &pixels,
|
||||
const uint8_t* pixels,
|
||||
uint32_t width,
|
||||
uint32_t height);
|
||||
uint32_t height,
|
||||
int32_t pitch);
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels,
|
||||
uint32_t width,
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -DSM_ENABLE=0 -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -11,7 +11,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
|||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -std=c++11 -march=rv32imf -mabi=ilp32f -O3 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -DENABLE_SW -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
@ -22,7 +22,7 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
|||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw -I$(VORTEX_RT_PATH)/../sim/common -I$(VORTEX_RT_PATH)/../third_party
|
||||
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex $(VORTEX_RT_PATH)/../third_party/cocogfx/libcocogfx.a -lz
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex $(VORTEX_RT_PATH)/../third_party/cocogfx/libcocogfx.a -lpng -lz
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
|
@ -140,7 +140,7 @@ int render(const kernel_arg_t& kernel_arg,
|
|||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
//dump_image(dst_pixels, width, height, 4);
|
||||
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels, width, height));
|
||||
RT_CHECK(SaveImage(output_file, FORMAT_A8R8G8B8, dst_pixels.data(), width, height, width * 4));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ int main(int argc, char *argv[]) {
|
|||
uint32_t src_bpp = Format::GetInfo(eformat).BytePerPixel;
|
||||
uint32_t src_pitch = src_width * src_bpp;
|
||||
//dump_image(staging, src_width, src_height, src_bpp);
|
||||
RT_CHECK(GenerateMipmaps(src_pixels, mip_offsets, staging, eformat, src_width, src_height, src_pitch));
|
||||
RT_CHECK(GenerateMipmaps(src_pixels, mip_offsets, staging.data(), eformat, src_width, src_height, src_pitch));
|
||||
}
|
||||
|
||||
uint32_t src_logwidth = log2ceil(src_width);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <iomanip>
|
||||
#include <cocogfx/include/tga.hpp>
|
||||
#include <cocogfx/include/png.hpp>
|
||||
#include <cocogfx/include/bmp.hpp>
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
|
@ -46,6 +47,11 @@ int LoadImage(const char *filename,
|
|||
int ret = LoadPNG(filename, pixels, &img_width, &img_height, &img_bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else
|
||||
if (iequals(ext, "bmp")) {
|
||||
int ret = LoadBMP(filename, pixels, &img_width, &img_height, &img_bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
std::cerr << "invalid file extension: " << ext << "!" << std::endl;
|
||||
return -1;
|
||||
|
@ -72,7 +78,7 @@ int LoadImage(const char *filename,
|
|||
if (img_format != format) {
|
||||
// format conversion to RGBA
|
||||
std::vector<uint8_t> staging;
|
||||
int ret = ConvertImage(staging, format, pixels, img_format, img_width, img_height, img_width * img_bpp);
|
||||
int ret = ConvertImage(staging, format, pixels.data(), img_format, img_width, img_height, img_width * img_bpp);
|
||||
if (ret)
|
||||
return ret;
|
||||
pixels.swap(staging);
|
||||
|
@ -86,16 +92,20 @@ int LoadImage(const char *filename,
|
|||
|
||||
int SaveImage(const char *filename,
|
||||
ePixelFormat format,
|
||||
const std::vector<uint8_t> &pixels,
|
||||
const uint8_t* pixels,
|
||||
uint32_t width,
|
||||
uint32_t height) {
|
||||
uint32_t height,
|
||||
int32_t pitch) {
|
||||
auto bpp = Format::GetInfo(format).BytePerPixel;
|
||||
auto ext = getFileExt(filename);
|
||||
if (iequals(ext, "tga")) {
|
||||
return SaveTGA(filename, pixels, width, height, bpp);
|
||||
return SaveTGA(filename, pixels, width, height, bpp, pitch);
|
||||
} else
|
||||
if (iequals(ext, "png")) {
|
||||
return SavePNG(filename, pixels, width, height, bpp);
|
||||
return SavePNG(filename, pixels, width, height, bpp, pitch);
|
||||
} else
|
||||
if (iequals(ext, "bmp")) {
|
||||
return SaveBMP(filename, pixels, width, height, bpp, pitch);
|
||||
} else {
|
||||
std::cerr << "invalid file extension: " << ext << "!" << std::endl;
|
||||
return -1;
|
||||
|
|
|
@ -12,9 +12,10 @@ int LoadImage(const char *filename,
|
|||
|
||||
int SaveImage(const char *filename,
|
||||
cocogfx::ePixelFormat format,
|
||||
const std::vector<uint8_t> &pixels,
|
||||
const uint8_t* pixels,
|
||||
uint32_t width,
|
||||
uint32_t height);
|
||||
uint32_t height,
|
||||
int32_t pitch);
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels,
|
||||
uint32_t width,
|
||||
|
|
|
@ -22,7 +22,7 @@ else
|
|||
CFLAGS += -march=rv64imfd -mabi=lp64d
|
||||
endif
|
||||
|
||||
CFLAGS += -O3 -Wstack-usage=1024 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -O3 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -22,7 +22,7 @@ else
|
|||
CFLAGS += -march=rv64imfd -mabi=lp64d
|
||||
endif
|
||||
|
||||
CFLAGS += -O3 -Wstack-usage=1024 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -O3 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
|
@ -22,7 +22,7 @@ else
|
|||
CFLAGS += -march=rv64imfd -mabi=lp64d
|
||||
endif
|
||||
|
||||
CFLAGS += -O3 -Wstack-usage=1024 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -O3 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
|
2
third_party/cocogfx
vendored
2
third_party/cocogfx
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 6ff9739cee9a0528142123985e4d8e59f7d0a4e8
|
||||
Subproject commit eb441b788174e9bbc09c7207319e1bf87fc6b16f
|
Loading…
Add table
Add a link
Reference in a new issue