mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
raster unit refactoring
This commit is contained in:
parent
55eb8562a7
commit
e11bc9d0fb
18 changed files with 271 additions and 194 deletions
|
@ -145,6 +145,9 @@ export PERF_CLASS=$PERF_CLASS
|
|||
|
||||
status=0
|
||||
|
||||
# ensure config update
|
||||
make -C hw config
|
||||
|
||||
# ensure the stub driver is present
|
||||
make -C $VORTEX_HOME/driver/stub
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
RTL_DIR=./rtl
|
||||
SCRIPT_DIR=./scripts
|
||||
|
||||
all: VX_config.h VX_types.h
|
||||
all: config
|
||||
|
||||
config: VX_config.h VX_types.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
|
|
@ -68,7 +68,6 @@ module VX_cluster #(
|
|||
.TILE_LOGSIZE (`RASTER_TILE_LOGSIZE),
|
||||
.BLOCK_LOGSIZE (`RASTER_BLOCK_LOGSIZE),
|
||||
.MEM_FIFO_DEPTH (`RASTER_MEM_FIFO_DEPTH),
|
||||
.TILE_FIFO_DEPTH (`RASTER_TILE_FIFO_DEPTH),
|
||||
.QUAD_FIFO_DEPTH (`RASTER_QUAD_FIFO_DEPTH),
|
||||
.OUTPUT_QUADS (`NUM_THREADS)
|
||||
) raster_unit (
|
||||
|
|
|
@ -258,6 +258,10 @@
|
|||
|
||||
`define RESET_DELAY 6
|
||||
|
||||
`ifndef STALL_TIMEOUT
|
||||
`define STALL_TIMEOUT (10000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
|
||||
`endif
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of Instruction Buffer
|
||||
|
@ -292,7 +296,7 @@
|
|||
|
||||
// RASTER tile size
|
||||
`ifndef RASTER_TILE_LOGSIZE
|
||||
`define RASTER_TILE_LOGSIZE 4
|
||||
`define RASTER_TILE_LOGSIZE 5
|
||||
`endif
|
||||
|
||||
// RASTER block size
|
||||
|
@ -305,19 +309,14 @@
|
|||
`define RASTER_MEM_FIFO_DEPTH 8
|
||||
`endif
|
||||
|
||||
// RASTER tile queue size
|
||||
`ifndef RASTER_TILE_FIFO_DEPTH
|
||||
`define RASTER_TILE_FIFO_DEPTH (1 << (2 * (`RASTER_TILE_LOGSIZE - `RASTER_BLOCK_LOGSIZE)))
|
||||
// ROP memory pending size
|
||||
`ifndef ROP_MEM_PENDING_SIZE
|
||||
`define ROP_MEM_PENDING_SIZE 4
|
||||
`endif
|
||||
|
||||
// RASTER quad queue size
|
||||
`ifndef RASTER_QUAD_FIFO_DEPTH
|
||||
`define RASTER_QUAD_FIFO_DEPTH 16
|
||||
`endif
|
||||
|
||||
// ROP memory pending size
|
||||
`ifndef ROP_MEM_PENDING_SIZE
|
||||
`define ROP_MEM_PENDING_SIZE 4
|
||||
`define RASTER_QUAD_FIFO_DEPTH 8
|
||||
`endif
|
||||
|
||||
// ROP number of slices
|
||||
|
|
|
@ -18,7 +18,6 @@ module VX_icache_stage #(
|
|||
// reponse
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
|
@ -50,7 +49,7 @@ module VX_icache_stage #(
|
|||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This will resolve potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both request share the same bus.
|
||||
wire [`NUM_WARPS-1:0] pending_reads_full;
|
||||
wire [`NUM_WARPS-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE + 1)
|
||||
|
@ -59,7 +58,7 @@ module VX_icache_stage #(
|
|||
.reset (reset),
|
||||
.incr (icache_req_fire && (ifetch_req_if.wid == `NW_BITS'(i))),
|
||||
.decr (ifetch_rsp_if.ibuf_pop[i]),
|
||||
.full (pending_reads_full[i]),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
@ -69,7 +68,7 @@ module VX_icache_stage #(
|
|||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.uuid))
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.valid = ifetch_req_if.valid && ~pending_reads_full[ifetch_req_if.wid];
|
||||
assign icache_req_if.valid = ifetch_req_if.valid && ~pending_ibuf_full[ifetch_req_if.wid];
|
||||
assign icache_req_if.rw = 0;
|
||||
assign icache_req_if.byteen = '0;
|
||||
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
|
||||
|
@ -77,7 +76,7 @@ module VX_icache_stage #(
|
|||
assign icache_req_if.tag = {ifetch_req_if.uuid, req_tag};
|
||||
|
||||
// Can accept new request?
|
||||
assign ifetch_req_if.ready = icache_req_if.ready && ~pending_reads_full[ifetch_req_if.wid];
|
||||
assign ifetch_req_if.ready = icache_req_if.ready && ~pending_ibuf_full[ifetch_req_if.wid];
|
||||
|
||||
wire [`NW_BITS-1:0] rsp_wid = rsp_tag;
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ module VX_issue #(
|
|||
`endif
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
|
||||
VX_ibuffer_if ibuffer_if();
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
|
@ -30,6 +31,8 @@ module VX_issue #(
|
|||
VX_scoreboard_if scoreboard_if();
|
||||
VX_dispatch_if dispatch_if();
|
||||
|
||||
wire [3:0] in_use_regs;
|
||||
|
||||
// GPR request interface
|
||||
assign gpr_req_if.wid = ibuffer_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuffer_if.rs1;
|
||||
|
@ -99,7 +102,8 @@ module VX_issue #(
|
|||
.clk (clk),
|
||||
.reset (scoreboard_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.in_use_regs (in_use_regs)
|
||||
);
|
||||
|
||||
VX_gpr_stage #(
|
||||
|
@ -125,6 +129,28 @@ module VX_issue #(
|
|||
`endif
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
reg [31:0] timeout_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= 0;
|
||||
end else begin
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
dpi_trace(3, "%d: *** core%0d-stall: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, wb=%0d, cycles=%0d, inuse=%b%b%b%b, dispatch=%b (#%0d)\n",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.tmask, ibuffer_if.rd, ibuffer_if.wb, timeout_ctr,
|
||||
in_use_regs[0], in_use_regs[1], in_use_regs[2], in_use_regs[3], ~dispatch_if.ready, ibuffer_if.uuid);
|
||||
`endif
|
||||
`ASSERT(timeout_ctr < `STALL_TIMEOUT,
|
||||
("%t: *** core%0d-issue-timeout: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, wb=%0d, inuse=%b%b%b%b, dispatch=%b (#%0d)",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.tmask, ibuffer_if.rd, ibuffer_if.wb,
|
||||
in_use_regs[0], in_use_regs[1], in_use_regs[2], in_use_regs[3], ~dispatch_if.ready, ibuffer_if.uuid));
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
timeout_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
|
||||
`SCOPE_ASSIGN (issue_uuid, ibuffer_if.uuid);
|
||||
|
|
|
@ -3,11 +3,12 @@
|
|||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_writeback_if.slave writeback_if
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
output wire [3:0] in_use_regs
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
|
@ -45,40 +46,25 @@ module VX_scoreboard #(
|
|||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
assign scoreboard_if.ready = ~(deq_inuse_rd
|
||||
| deq_inuse_rs1
|
||||
| deq_inuse_rs2
|
||||
| deq_inuse_rs3);
|
||||
| deq_inuse_rs1
|
||||
| deq_inuse_rs2
|
||||
| deq_inuse_rs3);
|
||||
|
||||
`UNUSED_VAR (writeback_if.PC)
|
||||
`UNUSED_VAR (scoreboard_if.PC)
|
||||
`UNUSED_VAR (scoreboard_if.tmask)
|
||||
`UNUSED_VAR (scoreboard_if.uuid)
|
||||
|
||||
reg [31:0] deadlock_ctr;
|
||||
wire [31:0] deadlock_timeout = 10000 * (1 ** (`L2_ENABLED + `L3_ENABLED));
|
||||
assign in_use_regs[0] = deq_inuse_rd;
|
||||
assign in_use_regs[1] = deq_inuse_rs1;
|
||||
assign in_use_regs[2] = deq_inuse_rs2;
|
||||
assign in_use_regs[3] = deq_inuse_rs3;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
deadlock_ctr <= 0;
|
||||
end else begin
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
if (scoreboard_if.valid && ~scoreboard_if.ready) begin
|
||||
dpi_trace(3, "%d: *** core%0d-stall: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, wb=%0d, inuse=%b%b%b%b (#%0d)\n",
|
||||
$time, CORE_ID, scoreboard_if.wid, scoreboard_if.PC, scoreboard_if.tmask, scoreboard_if.rd, scoreboard_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3, scoreboard_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (release_reg) begin
|
||||
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.uuid));
|
||||
end
|
||||
if (scoreboard_if.valid && ~scoreboard_if.ready) begin
|
||||
deadlock_ctr <= deadlock_ctr + 1;
|
||||
`ASSERT(deadlock_ctr < deadlock_timeout,
|
||||
("%t: *** core%0d-deadlock: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, wb=%0d, inuse=%b%b%b%b (#%0d)",
|
||||
$time, CORE_ID, scoreboard_if.wid, scoreboard_if.PC, scoreboard_if.tmask, scoreboard_if.rd, scoreboard_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3, scoreboard_if.uuid));
|
||||
end else if (scoreboard_if.valid && scoreboard_if.ready) begin
|
||||
deadlock_ctr <= 0;
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (release_reg) begin
|
||||
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.uuid));
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -245,12 +245,27 @@ module VX_warp_sched #(
|
|||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.enable (~stall_out),
|
||||
.data_in ({schedule_valid, instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
reg [31:0] timeout_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= 0;
|
||||
end else begin
|
||||
if (active_warps !=0 && active_warps == stalled_warps) begin
|
||||
`ASSERT(timeout_ctr < `STALL_TIMEOUT,
|
||||
("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps));
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (active_warps == 0 || active_warps != stalled_warps) begin
|
||||
timeout_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`SCOPE_ASSIGN (wsched_scheduled, warp_scheduled);
|
||||
`SCOPE_ASSIGN (wsched_schedule_uuid, instr_uuid);
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
`include "VX_raster_define.vh"
|
||||
|
||||
module VX_raster_be #(
|
||||
parameter SLICE_ID = 1,
|
||||
parameter BLOCK_LOGSIZE = 6,
|
||||
parameter SLICE_ID = 0,
|
||||
parameter BLOCK_LOGSIZE = 5,
|
||||
parameter OUTPUT_QUADS = 2,
|
||||
parameter QUAD_FIFO_DEPTH = 16
|
||||
parameter QUAD_FIFO_DEPTH = 4
|
||||
) (
|
||||
// Standard inputs
|
||||
input wire clk,
|
||||
|
|
|
@ -15,21 +15,23 @@ module VX_raster_edge_function #(
|
|||
|
||||
output wire [2:0][`RASTER_DATA_BITS-1:0] result
|
||||
);
|
||||
localparam PROD_WIDTH = `RASTER_DATA_BITS + `RASTER_DIM_BITS;
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`STATIC_ASSERT((LATENCY >= `LATENCY_IMUL), ("invalid parameter"))
|
||||
|
||||
wire [2:0][`RASTER_DATA_BITS-1:0] prod_x;
|
||||
wire [2:0][`RASTER_DATA_BITS-1:0] prod_y;
|
||||
wire [2:0][`RASTER_DATA_BITS-1:0] edge_c;
|
||||
wire [2:0][PROD_WIDTH-1:0] prod_x;
|
||||
wire [2:0][PROD_WIDTH-1:0] prod_y;
|
||||
wire [2:0][`RASTER_DATA_BITS-1:0] edge_c, edge_c_s;
|
||||
|
||||
wire [2:0][`RASTER_DATA_BITS-1:0] edge_c_s, result_s;
|
||||
wire [2:0][`RASTER_DATA_BITS-1:0] result_s;
|
||||
|
||||
for (genvar i = 0; i < 3; ++i) begin
|
||||
VX_multiplier #(
|
||||
.WIDTHA (`RASTER_DATA_BITS),
|
||||
.WIDTHB (`RASTER_DIM_BITS),
|
||||
.WIDTHP (`RASTER_DATA_BITS),
|
||||
.WIDTHP (PROD_WIDTH),
|
||||
.SIGNED (1),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) x_multiplier (
|
||||
|
@ -43,7 +45,7 @@ module VX_raster_edge_function #(
|
|||
VX_multiplier #(
|
||||
.WIDTHA (`RASTER_DATA_BITS),
|
||||
.WIDTHB (`RASTER_DIM_BITS),
|
||||
.WIDTHP (`RASTER_DATA_BITS),
|
||||
.WIDTHP (PROD_WIDTH),
|
||||
.SIGNED (1),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) y_multiplier (
|
||||
|
@ -69,7 +71,9 @@ module VX_raster_edge_function #(
|
|||
);
|
||||
|
||||
for (genvar i = 0; i < 3; ++i) begin
|
||||
assign result_s[i] = prod_x[i] + prod_y[i] + edge_c_s[i];
|
||||
wire [PROD_WIDTH-1:0] sum = prod_x[i] + prod_y[i] + PROD_WIDTH'(edge_c_s[i]);
|
||||
`UNUSED_VAR (sum)
|
||||
assign result_s[i] = sum[`RASTER_DATA_BITS-1:0];
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
`include "VX_raster_define.vh"
|
||||
|
||||
module VX_raster_extents #(
|
||||
parameter TILE_LOGSIZE = 64
|
||||
parameter TILE_LOGSIZE = 5
|
||||
) (
|
||||
input wire signed [2:0][2:0][`RASTER_DATA_BITS-1:0] edges,
|
||||
output wire signed [2:0][`RASTER_DATA_BITS-1:0] extents
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
// 3. Store primitive data in an elastic buffer
|
||||
|
||||
module VX_raster_mem #(
|
||||
parameter TILE_LOGSIZE = 16,
|
||||
parameter TILE_LOGSIZE = 5,
|
||||
parameter QUEUE_SIZE = 8
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -34,61 +34,69 @@ module VX_raster_mem #(
|
|||
);
|
||||
`UNUSED_VAR (dcrs)
|
||||
|
||||
localparam MUL_LATENCY = 3;
|
||||
localparam NUM_REQS = `RASTER_MEM_REQS;
|
||||
localparam FSM_BITS = 2;
|
||||
localparam TAG_WIDTH = `RASTER_PID_BITS;
|
||||
localparam MUL_LATENCY = 3;
|
||||
localparam NUM_REQS = `RASTER_MEM_REQS;
|
||||
localparam FSM_BITS = 2;
|
||||
localparam FETCH_FLAG_BITS = 2;
|
||||
localparam TAG_WIDTH = `RASTER_PID_BITS + FETCH_FLAG_BITS;
|
||||
|
||||
localparam STATE_IDLE = 2'b00;
|
||||
localparam STATE_TILE = 2'b01;
|
||||
localparam STATE_PRIM = 2'b10;
|
||||
localparam STATE_IDLE = 2'b00;
|
||||
localparam STATE_TILE = 2'b01;
|
||||
localparam STATE_PRIM = 2'b10;
|
||||
|
||||
localparam TILE_FETCH_MASK = 9'(2'b11);
|
||||
localparam PID_FETCH_MASK = 9'(1'b01);
|
||||
localparam PDATA_FETCH_MASK = {9{1'b1}};
|
||||
localparam FETCH_FLAG_TILE = 2'b00;
|
||||
localparam FETCH_FLAG_PID = 2'b01;
|
||||
localparam FETCH_FLAG_PDATA = 2'b10;
|
||||
|
||||
// A primitive data contains (x_loc, y_loc, pid, edges)
|
||||
localparam PRIM_DATA_WIDTH = 2 * `RASTER_DIM_BITS + `RASTER_PID_BITS + 9 * `RASTER_DATA_BITS;
|
||||
localparam PRIM_DATA_WIDTH = 2 * `RASTER_DIM_BITS+ 9 * `RASTER_DATA_BITS + `RASTER_PID_BITS ;
|
||||
|
||||
// Storage to cycle through all primitives and tiles
|
||||
reg [`RASTER_DCR_DATA_BITS-1:0] curr_tbuf_addr;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_num_prims;
|
||||
reg [`RASTER_PID_BITS-1:0] rem_num_prims;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_pid_reqs;
|
||||
reg [`RASTER_PID_BITS-1:0] curr_pid_rsps;
|
||||
reg [`RASTER_TILE_BITS-1:0] curr_num_tiles;
|
||||
reg [`RASTER_DIM_BITS-1:0] curr_x_loc;
|
||||
reg [`RASTER_DIM_BITS-1:0] curr_y_loc;
|
||||
|
||||
// Output buffer
|
||||
wire buf_out_valid;
|
||||
wire buf_out_ready;
|
||||
wire buf_in_valid;
|
||||
wire buf_in_ready;
|
||||
|
||||
// Memory request
|
||||
reg mem_req_valid;
|
||||
reg mem_req_valid, mem_req_valid_qual;
|
||||
reg [NUM_REQS-1:0] mem_req_mask;
|
||||
reg [8:0][`RASTER_DCR_DATA_BITS-1:0] mem_req_addr;
|
||||
reg [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
// Memory response
|
||||
wire mem_rsp_valid;
|
||||
reg [NUM_REQS-1:0] mem_rsp_mask;
|
||||
wire mem_rsp_valid;
|
||||
wire [8:0][`RASTER_DATA_BITS-1:0] mem_rsp_data;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
|
||||
// Primitive info
|
||||
wire prim_id_rsp_valid;
|
||||
wire prim_data_rsp_valid;
|
||||
wire prim_addr_rsp_valid;
|
||||
wire prim_addr_rsp_ready;
|
||||
wire [8:0][`RASTER_DATA_BITS-1:0] prim_mem_addr;
|
||||
wire [`RASTER_PID_BITS-1:0] prim_id;
|
||||
wire [`RASTER_PID_BITS-1:0] primitive_id;
|
||||
|
||||
// Memory fetch FSM
|
||||
|
||||
reg [FSM_BITS-1:0] state;
|
||||
|
||||
wire is_prim_id_req = (mem_req_tag[FETCH_FLAG_BITS-1:0] == FETCH_FLAG_PID);
|
||||
wire is_prim_id_rsp = (mem_rsp_tag[FETCH_FLAG_BITS-1:0] == FETCH_FLAG_PID);
|
||||
|
||||
wire fsm_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire is_prim_data_req = (mem_req_tag[FETCH_FLAG_BITS-1:0] == FETCH_FLAG_PDATA);
|
||||
wire is_prim_data_rsp = (mem_rsp_tag[FETCH_FLAG_BITS-1:0] == FETCH_FLAG_PDATA);
|
||||
|
||||
wire prim_data_rsp_valid = mem_rsp_valid
|
||||
&& (state == STATE_PRIM)
|
||||
&& mem_rsp_mask[1];
|
||||
wire mem_req_fire = mem_req_valid_qual && mem_req_ready;
|
||||
|
||||
wire prim_addr_rsp_fire = prim_addr_rsp_valid && prim_addr_rsp_ready;
|
||||
|
||||
wire prim_data_rsp_fire = prim_data_rsp_valid && mem_rsp_ready;
|
||||
|
||||
|
@ -97,26 +105,28 @@ module VX_raster_mem #(
|
|||
state <= STATE_IDLE;
|
||||
mem_req_valid <= 0;
|
||||
curr_tbuf_addr <= 0;
|
||||
curr_num_prims <= 0;
|
||||
rem_num_prims <= 0;
|
||||
curr_pid_reqs <= 0;
|
||||
curr_pid_rsps <= 0;
|
||||
curr_num_tiles <= 0;
|
||||
end begin
|
||||
// deassert valid when request is sent
|
||||
if (fsm_req_fire) begin
|
||||
// deassert memory request when fired
|
||||
if (mem_req_fire) begin
|
||||
mem_req_valid <= 0;
|
||||
end
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (start && (dcrs.tile_count != 0)) begin
|
||||
// fetch the next tile header
|
||||
state <= STATE_TILE;
|
||||
mem_req_valid <= 1;
|
||||
curr_num_tiles <= dcrs.tile_count;
|
||||
mem_req_addr[0] <= dcrs.tbuf_addr;
|
||||
mem_req_addr[1] <= dcrs.tbuf_addr + 4;
|
||||
mem_req_mask <= TILE_FETCH_MASK;
|
||||
mem_req_tag <= 'x;
|
||||
mem_req_mask <= 9'b11;
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_TILE);
|
||||
// set tile counters
|
||||
curr_tbuf_addr <= dcrs.tbuf_addr + 4 + 4;
|
||||
curr_num_tiles <= dcrs.tile_count;
|
||||
end
|
||||
end
|
||||
STATE_TILE: begin
|
||||
|
@ -125,54 +135,64 @@ module VX_raster_mem #(
|
|||
state <= STATE_PRIM;
|
||||
curr_x_loc <= `RASTER_DIM_BITS'(mem_rsp_data[0][0 +: 16] << TILE_LOGSIZE);
|
||||
curr_y_loc <= `RASTER_DIM_BITS'(mem_rsp_data[0][16 +: 16] << TILE_LOGSIZE);
|
||||
// send next primitive address
|
||||
// fetch next primitive pid
|
||||
mem_req_valid <= 1;
|
||||
mem_req_addr[0] <= curr_tbuf_addr;
|
||||
mem_req_mask <= PID_FETCH_MASK;
|
||||
mem_req_tag <= 'x;
|
||||
curr_tbuf_addr <= curr_tbuf_addr + 4;
|
||||
curr_num_prims <= mem_rsp_data[1][`RASTER_PID_BITS-1:0];
|
||||
rem_num_prims <= mem_rsp_data[1][`RASTER_PID_BITS-1:0];
|
||||
mem_req_mask <= 9'b1;
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_PID);
|
||||
// set primitive counters
|
||||
curr_pid_reqs <= mem_rsp_data[1][`RASTER_PID_BITS-1:0];
|
||||
curr_pid_rsps <= mem_rsp_data[1][`RASTER_PID_BITS-1:0];
|
||||
end
|
||||
end
|
||||
STATE_PRIM: begin
|
||||
if (prim_addr_rsp_valid) begin
|
||||
// handle primitive address response
|
||||
mem_req_valid <= 1;
|
||||
// handle memory submissions
|
||||
if (mem_req_fire) begin
|
||||
if (is_prim_id_req) begin
|
||||
// update pid counters
|
||||
curr_tbuf_addr <= curr_tbuf_addr + 4;
|
||||
curr_pid_reqs <= curr_pid_reqs - `RASTER_PID_BITS'(1);
|
||||
end
|
||||
|
||||
if ((curr_pid_reqs > 1)
|
||||
|| (curr_pid_reqs == 1 && ~is_prim_id_req)) begin
|
||||
// fetch next primitive pid
|
||||
mem_req_valid <= 1;
|
||||
mem_req_mask <= 9'b1;
|
||||
mem_req_addr[0] <= curr_tbuf_addr + (is_prim_id_req ? 4 : 0);
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_PID);
|
||||
end
|
||||
end
|
||||
|
||||
// handle primitive address response
|
||||
if (prim_addr_rsp_fire) begin
|
||||
mem_req_valid <= 1;
|
||||
mem_req_mask <= 9'b111111111;
|
||||
mem_req_addr <= prim_mem_addr;
|
||||
mem_req_mask <= PDATA_FETCH_MASK;
|
||||
mem_req_tag <= prim_id;
|
||||
end else
|
||||
mem_req_tag <= TAG_WIDTH'({primitive_id, FETCH_FLAG_PDATA});
|
||||
end
|
||||
|
||||
// handle primitive data response
|
||||
if (prim_data_rsp_fire) begin
|
||||
// handle primitive data response
|
||||
if (rem_num_prims == 1) begin
|
||||
if (curr_num_tiles != 1) begin
|
||||
// Fetch the next tile
|
||||
state <= STATE_TILE;
|
||||
mem_req_valid <= 1;
|
||||
mem_req_addr[0] <= curr_tbuf_addr;
|
||||
mem_req_addr[1] <= curr_tbuf_addr + 4;
|
||||
mem_req_mask <= TILE_FETCH_MASK;
|
||||
mem_req_tag <= 'x;
|
||||
curr_tbuf_addr <= curr_tbuf_addr + 4 + 4;
|
||||
curr_num_tiles <= curr_num_tiles - `RASTER_TILE_BITS'(1);
|
||||
end else begin
|
||||
if (curr_pid_rsps == 1) begin
|
||||
if (curr_num_tiles == 1) begin
|
||||
// done, return to idle
|
||||
state <= STATE_IDLE;
|
||||
end else begin
|
||||
// fetch the next tile header
|
||||
state <= STATE_TILE;
|
||||
mem_req_valid <= 1;
|
||||
mem_req_mask <= 9'b11;
|
||||
mem_req_addr[0] <= curr_tbuf_addr;
|
||||
mem_req_addr[1] <= curr_tbuf_addr + 4;
|
||||
mem_req_tag <= TAG_WIDTH'(FETCH_FLAG_TILE);
|
||||
curr_tbuf_addr <= curr_tbuf_addr + 4 + 4;
|
||||
end
|
||||
// update tile counter
|
||||
curr_num_tiles <= curr_num_tiles - `RASTER_TILE_BITS'(1);
|
||||
end
|
||||
rem_num_prims <= rem_num_prims - `RASTER_PID_BITS'(1);
|
||||
end else
|
||||
if (fsm_req_fire) begin
|
||||
// send next primitive address
|
||||
if (curr_num_prims != 1) begin
|
||||
mem_req_valid <= 1;
|
||||
mem_req_addr[0] <= curr_tbuf_addr;
|
||||
mem_req_mask <= PID_FETCH_MASK;
|
||||
mem_req_tag <= 'x;
|
||||
curr_tbuf_addr <= curr_tbuf_addr + 4;
|
||||
curr_num_prims <= curr_num_prims - `RASTER_PID_BITS'(1);
|
||||
end
|
||||
// update pid counter
|
||||
curr_pid_rsps <= curr_pid_rsps - `RASTER_PID_BITS'(1);
|
||||
end
|
||||
end
|
||||
default:;
|
||||
|
@ -182,9 +202,37 @@ module VX_raster_mem #(
|
|||
|
||||
// Memory streamer
|
||||
|
||||
// stall the memory response only if edge data cannot be taken
|
||||
assign mem_rsp_ready = (~prim_data_rsp_valid || buf_out_ready)
|
||||
&& ~prim_addr_rsp_valid;
|
||||
// ensure that we have space in the output buffer to prevent memory deadlock
|
||||
wire pending_output_full;
|
||||
VX_pending_size #(
|
||||
.SIZE (QUEUE_SIZE-1)
|
||||
) pending_reads (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (mem_req_fire && is_prim_id_req),
|
||||
.decr (valid_out && ready_out),
|
||||
.full (pending_output_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
assign mem_req_valid_qual = mem_req_valid && (~is_prim_id_req || ~pending_output_full);
|
||||
|
||||
// the memory response is for primitive id
|
||||
assign prim_id_rsp_valid = mem_rsp_valid && is_prim_id_rsp;
|
||||
|
||||
// the memory response is for primitive data
|
||||
assign prim_data_rsp_valid = mem_rsp_valid && is_prim_data_rsp;
|
||||
|
||||
// stall primitive address handling if primitive data fetch stalls
|
||||
wire prim_data_req_stall = mem_req_valid && is_prim_data_req && ~mem_req_ready;
|
||||
assign prim_addr_rsp_ready = ~prim_data_req_stall || ~prim_addr_rsp_valid;
|
||||
|
||||
// Push primitive data into output buffer
|
||||
assign buf_in_valid = prim_data_rsp_valid;
|
||||
|
||||
// stall the memory response
|
||||
assign mem_rsp_ready = (~prim_id_rsp_valid || prim_addr_rsp_ready)
|
||||
&& (~prim_data_rsp_valid || buf_in_ready);
|
||||
|
||||
wire [8:0][`RCACHE_ADDR_WIDTH-1:0] mem_req_addr_w;
|
||||
for (genvar i = 0; i < 9; ++i) begin
|
||||
|
@ -204,7 +252,7 @@ module VX_raster_mem #(
|
|||
.reset (reset),
|
||||
|
||||
// Input request
|
||||
.req_valid (mem_req_valid),
|
||||
.req_valid (mem_req_valid_qual),
|
||||
.req_rw (1'b0),
|
||||
.req_mask (mem_req_mask),
|
||||
`UNUSED_PIN (req_byteen),
|
||||
|
@ -215,7 +263,7 @@ module VX_raster_mem #(
|
|||
|
||||
// Output response
|
||||
.rsp_valid (mem_rsp_valid),
|
||||
.rsp_mask (mem_rsp_mask),
|
||||
`UNUSED_PIN (rsp_mask),
|
||||
.rsp_data (mem_rsp_data),
|
||||
.rsp_tag (mem_rsp_tag),
|
||||
.rsp_ready (mem_rsp_ready),
|
||||
|
@ -246,7 +294,7 @@ module VX_raster_mem #(
|
|||
.LATENCY (MUL_LATENCY)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.enable (1'b1),
|
||||
.enable (prim_addr_rsp_ready),
|
||||
.dataa (mem_rsp_data[0]),
|
||||
.datab (dcrs.pbuf_stride),
|
||||
.result (prim_mem_offset)
|
||||
|
@ -256,9 +304,6 @@ module VX_raster_mem #(
|
|||
assign prim_mem_addr[i] = dcrs.pbuf_addr + prim_mem_offset + 4 * i;
|
||||
end
|
||||
|
||||
// onlt delay primitive addresses for multiplication (mask = 1)
|
||||
wire mem_rsp_valid_p = mem_rsp_valid && ~mem_rsp_mask[1];
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `RASTER_PID_BITS),
|
||||
.DEPTH (MUL_LATENCY),
|
||||
|
@ -266,17 +311,12 @@ module VX_raster_mem #(
|
|||
) mul_shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({mem_rsp_valid_p, mem_rsp_data[0][`RASTER_PID_BITS-1:0]}),
|
||||
.data_out ({prim_addr_rsp_valid, prim_id})
|
||||
.enable (prim_addr_rsp_ready),
|
||||
.data_in ({prim_id_rsp_valid, mem_rsp_data[0][`RASTER_PID_BITS-1:0]}),
|
||||
.data_out ({prim_addr_rsp_valid, primitive_id})
|
||||
);
|
||||
|
||||
// Output buffer
|
||||
|
||||
assign buf_out_valid = prim_data_rsp_valid
|
||||
&& ~prim_addr_rsp_valid;
|
||||
|
||||
`UNUSED_VAR (mem_rsp_mask)
|
||||
// Output buffer
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (PRIM_DATA_WIDTH),
|
||||
|
@ -285,10 +325,10 @@ module VX_raster_mem #(
|
|||
) buf_out (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (buf_out_valid),
|
||||
.ready_in (buf_out_ready),
|
||||
.data_in ({curr_x_loc, curr_y_loc, mem_rsp_tag, mem_rsp_data}),
|
||||
.data_out ({x_loc_out, y_loc_out, pid_out, edges_out}),
|
||||
.valid_in (buf_in_valid),
|
||||
.ready_in (buf_in_ready),
|
||||
.data_in ({curr_x_loc, curr_y_loc, mem_rsp_data, mem_rsp_tag[FETCH_FLAG_BITS +: `RASTER_PID_BITS]}),
|
||||
.data_out ({x_loc_out, y_loc_out, edges_out, pid_out}),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
`include "VX_raster_define.vh"
|
||||
|
||||
module VX_raster_qe #(
|
||||
parameter SLICE_ID = 1,
|
||||
parameter SLICE_ID = 0,
|
||||
parameter NUM_QUADS = 4
|
||||
) (
|
||||
input wire clk,
|
||||
|
|
|
@ -9,12 +9,11 @@
|
|||
|
||||
module VX_raster_slice #(
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter SLICE_ID = 1,
|
||||
parameter TILE_LOGSIZE = 6, // tile log size
|
||||
parameter SLICE_ID = 0,
|
||||
parameter TILE_LOGSIZE = 5, // tile log size
|
||||
parameter BLOCK_LOGSIZE = 2, // block log size
|
||||
parameter OUTPUT_QUADS = 4,
|
||||
parameter QUAD_FIFO_DEPTH = 1,
|
||||
parameter TILE_FIFO_DEPTH = 16
|
||||
parameter QUAD_FIFO_DEPTH = 4
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
`include "VX_raster_define.vh"
|
||||
|
||||
module VX_raster_te #(
|
||||
parameter TILE_LOGSIZE = 6,
|
||||
parameter TILE_LOGSIZE = 5,
|
||||
parameter BLOCK_LOGSIZE = 2
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -32,8 +32,8 @@ module VX_raster_te #(
|
|||
output wire [2:0][2:0][`RASTER_DATA_BITS-1:0] edges_out,
|
||||
input wire ready_out
|
||||
);
|
||||
localparam LEVEL_BITS = (TILE_LOGSIZE - BLOCK_LOGSIZE) + 1;
|
||||
localparam TILE_FIFO_DEPTH = 1 << (TILE_LOGSIZE - BLOCK_LOGSIZE);
|
||||
localparam LEVEL_BITS = (TILE_LOGSIZE - BLOCK_LOGSIZE) + 1;
|
||||
localparam TILE_FIFO_DEPTH = 1 << (2 * (TILE_LOGSIZE - BLOCK_LOGSIZE));
|
||||
localparam FIFO_DATA_WIDTH = 2 * `RASTER_DIM_BITS + 3 * `RASTER_DATA_BITS + LEVEL_BITS;
|
||||
|
||||
wire stall;
|
||||
|
@ -80,32 +80,32 @@ module VX_raster_te #(
|
|||
tile_valid <= 0;
|
||||
if (fifo_arb_valid) begin
|
||||
// select fifo input
|
||||
tile_valid <= 1;
|
||||
tile_x_loc <= fifo_x_loc;
|
||||
tile_y_loc <= fifo_y_loc;
|
||||
tile_edge_eval <= fifo_edge_eval;
|
||||
tile_level <= fifo_level;
|
||||
tile_valid <= 1;
|
||||
tile_x_loc <= fifo_x_loc;
|
||||
tile_y_loc <= fifo_y_loc;
|
||||
tile_edge_eval <= fifo_edge_eval;
|
||||
tile_level <= fifo_level;
|
||||
end else
|
||||
if (is_fifo_bypass) begin
|
||||
// fifo bypass first sub-tile
|
||||
tile_valid <= 1;
|
||||
tile_x_loc <= subtile_x_loc_r[0];
|
||||
tile_y_loc <= subtile_y_loc_r[0];
|
||||
tile_edge_eval <= subtile_edge_eval_r[0];
|
||||
tile_level <= subtile_level_r;
|
||||
tile_valid <= 1;
|
||||
tile_x_loc <= subtile_x_loc_r[0];
|
||||
tile_y_loc <= subtile_y_loc_r[0];
|
||||
tile_edge_eval <= subtile_edge_eval_r[0];
|
||||
tile_level <= subtile_level_r;
|
||||
end else
|
||||
if (valid_in && ~tile_valid) begin
|
||||
// select new tile input
|
||||
tile_valid <= 1;
|
||||
tile_extents <= extents_in;
|
||||
tile_edges <= edges_in;
|
||||
tile_pid <= pid_in;
|
||||
tile_x_loc <= x_loc_in;
|
||||
tile_y_loc <= y_loc_in;
|
||||
tile_edge_eval[0]<= edges_in[0][2];
|
||||
tile_edge_eval[1]<= edges_in[1][2];
|
||||
tile_edge_eval[2]<= edges_in[2][2];
|
||||
tile_level <= 0;
|
||||
tile_valid <= 1;
|
||||
tile_extents <= extents_in;
|
||||
tile_edges <= edges_in;
|
||||
tile_pid <= pid_in;
|
||||
tile_x_loc <= x_loc_in;
|
||||
tile_y_loc <= y_loc_in;
|
||||
tile_edge_eval[0] <= edges_in[0][2];
|
||||
tile_edge_eval[1] <= edges_in[1][2];
|
||||
tile_edge_eval[2] <= edges_in[2][2];
|
||||
tile_level <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,12 +3,11 @@
|
|||
module VX_raster_unit #(
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter NUM_SLICES = 1, // number of raster slices
|
||||
parameter TILE_LOGSIZE = 6, // tile log size
|
||||
parameter TILE_LOGSIZE = 5, // tile log size
|
||||
parameter BLOCK_LOGSIZE = 2, // block log size
|
||||
parameter MEM_FIFO_DEPTH = 8, // memory queue size
|
||||
parameter TILE_FIFO_DEPTH = (1 << (2 * (TILE_LOGSIZE - BLOCK_LOGSIZE))), // tile queue size
|
||||
parameter QUAD_FIFO_DEPTH = 16, // quad queue size
|
||||
parameter OUTPUT_QUADS = 4 // number of output quads
|
||||
parameter MEM_FIFO_DEPTH = 4, // memory queue size
|
||||
parameter QUAD_FIFO_DEPTH = 4, // quad queue size
|
||||
parameter OUTPUT_QUADS = 4 // number of output quads
|
||||
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -35,7 +34,6 @@ module VX_raster_unit #(
|
|||
localparam PRIM_DATA_WIDTH = 2 * `RASTER_DIM_BITS + `RASTER_PID_BITS + 9 * `RASTER_DATA_BITS + 3 * `RASTER_DATA_BITS;
|
||||
|
||||
`STATIC_ASSERT(TILE_LOGSIZE > BLOCK_LOGSIZE, ("invalid parameter"))
|
||||
`STATIC_ASSERT(TILE_FIFO_DEPTH >= (1 << (2 * (TILE_LOGSIZE - BLOCK_LOGSIZE))), ("invalid parameter"))
|
||||
|
||||
raster_dcrs_t raster_dcrs;
|
||||
assign raster_dcrs = raster_dcr_if.data;
|
||||
|
@ -191,8 +189,7 @@ module VX_raster_unit #(
|
|||
.TILE_LOGSIZE (TILE_LOGSIZE),
|
||||
.BLOCK_LOGSIZE (BLOCK_LOGSIZE),
|
||||
.OUTPUT_QUADS (OUTPUT_QUADS),
|
||||
.QUAD_FIFO_DEPTH (QUAD_FIFO_DEPTH),
|
||||
.TILE_FIFO_DEPTH (TILE_FIFO_DEPTH)
|
||||
.QUAD_FIFO_DEPTH (QUAD_FIFO_DEPTH)
|
||||
) raster_slice (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -71,7 +71,7 @@ double sc_time_stamp() {
|
|||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
|
|
|
@ -134,6 +134,8 @@ int render(const CGLTrace& trace) {
|
|||
std::cout << "render" << std::endl;
|
||||
auto time_begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
uint32_t draw_idx = 0;
|
||||
|
||||
// render each draw call
|
||||
for (auto& drawcall : trace.drawcalls) {
|
||||
auto& states = drawcall.states;
|
||||
|
@ -143,7 +145,7 @@ int render(const CGLTrace& trace) {
|
|||
|
||||
// Perform tile binning
|
||||
auto num_tiles = Binning(tilebuf, primbuf, drawcall.vertices, drawcall.primitives, dst_width, dst_height, drawcall.viewport.near, drawcall.viewport.far, tile_size);
|
||||
std::cout << "Binning allocated " << std::dec << num_tiles << " tiles with " << primbuf.size() << " total primitives." << std::endl;
|
||||
std::cout << "Binning allocated " << std::dec << num_tiles << " tiles with " << (primbuf.size() / sizeof(rast_prim_t)) << " total primitives." << std::endl;
|
||||
if (0 == num_tiles)
|
||||
continue;
|
||||
|
||||
|
@ -337,6 +339,12 @@ int render(const CGLTrace& trace) {
|
|||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
if (draw_idx < trace.drawcalls.size()-1) {
|
||||
vx_dump_perf(device, stdout);
|
||||
}
|
||||
|
||||
++draw_idx;
|
||||
}
|
||||
|
||||
// download destination buffer
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue