gpu issue perf counters

This commit is contained in:
Blaise Tine 2022-07-28 23:39:41 -07:00
parent 500afe661e
commit 9e536b57c8
35 changed files with 456 additions and 1000 deletions

View file

@ -63,8 +63,8 @@ module VX_alu_unit #(
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`INST_ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
default: msc_result[i] = 'x;
endcase
end
end
@ -76,8 +76,7 @@ module VX_alu_unit #(
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
: shr_result[i]; // SRL, SRA
// 2'b11,
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
2'b11: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
endcase
end
end

View file

@ -314,12 +314,12 @@
// Size of texture Request Queue
`ifndef TEX_REQ_QUEUE_SIZE
`define TEX_REQ_QUEUE_SIZE `MAX(2, `NUM_WARPS * 2)
`define TEX_REQ_QUEUE_SIZE `MAX(2, `NUM_WARPS)
`endif
// Texture Unit memory pending Queue
// Texture Unit memory pending Queue (quad=4)
`ifndef TEX_MEM_QUEUE_SIZE
`define TEX_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS)
`define TEX_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS * 4)
`endif
// Raster Units ////////////////////////////////////////////////////////////////
@ -356,7 +356,7 @@
// RASTER memory queue size
`ifndef RASTER_MEM_FIFO_DEPTH
`define RASTER_MEM_FIFO_DEPTH 4
`define RASTER_MEM_FIFO_DEPTH 8
`endif
// Rop Units ///////////////////////////////////////////////////////////////////
@ -366,9 +366,9 @@
`define NUM_ROP_UNITS `UP(`NUM_CORES / 16)
`endif
// ROP memory pending size
// ROP memory pending size (quad=4)
`ifndef ROP_MEM_QUEUE_SIZE
`define ROP_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS)
`define ROP_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS * 4)
`endif
// Icache Configurable Knobs //////////////////////////////////////////////////
@ -537,12 +537,12 @@
// Size of cache in bytes
`ifndef TCACHE_SIZE
`define TCACHE_SIZE 4096
`define TCACHE_SIZE 8192
`endif
// Number of banks
`ifndef TCACHE_NUM_BANKS
`define TCACHE_NUM_BANKS `NUM_THREADS
`define TCACHE_NUM_BANKS 1
`endif
// Number of ports per bank
@ -562,7 +562,7 @@
// Miss Handling Register Size
`ifndef TCACHE_MSHR_SIZE
`define TCACHE_MSHR_SIZE 8
`define TCACHE_MSHR_SIZE (8 * 4)
`endif
// Memory Request Queue Size
@ -670,7 +670,7 @@
// Number of banks
`ifndef OCACHE_NUM_BANKS
`define OCACHE_NUM_BANKS `NUM_THREADS
`define OCACHE_NUM_BANKS 1
`endif
// Number of ports per bank
@ -690,7 +690,7 @@
// Miss Handling Register Size
`ifndef OCACHE_MSHR_SIZE
`define OCACHE_MSHR_SIZE 8
`define OCACHE_MSHR_SIZE (8 * 4)
`endif
// Memory Request Queue Size
@ -712,7 +712,11 @@
// Size of cache in bytes
`ifndef L2_CACHE_SIZE
`ifdef ALTERA_S10
`define L2_CACHE_SIZE 2097152
`else
`define L2_CACHE_SIZE 1048576
`endif
`endif
// Number of banks
@ -759,8 +763,12 @@
// Size of cache in bytes
`ifndef L3_CACHE_SIZE
`ifdef ALTERA_S10
`define L3_CACHE_SIZE 2097152
`else
`define L3_CACHE_SIZE 1048576
`endif
`endif
// Number of banks
`ifndef L3_NUM_BANKS

View file

@ -18,6 +18,7 @@ module VX_csr_data #(
`ifdef PERF_ENABLE
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
VX_perf_gpu_if.slave perf_gpu_if,
`ifdef EXT_TEX_ENABLE
VX_tex_perf_if.slave perf_tex_if,
VX_perf_cache_if.slave perf_tcache_if,
@ -217,7 +218,10 @@ module VX_csr_data #(
`CSR_MPM_MEM_WRITES : read_data_ro_r = perf_memsys_if.mem_writes[31:0];
`CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT : read_data_ro_r = perf_memsys_if.mem_latency[31:0];
`CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
// PERF: wctl
`CSR_MPM_WCTL_ISSUE_ST : read_data_ro_r = perf_gpu_if.wctl_stalls[31:0];
`CSR_MPM_WCTL_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.wctl_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
end
@ -239,6 +243,8 @@ module VX_csr_data #(
`CSR_MPM_TCACHE_MSHR_ST :read_data_ro_r = perf_tcache_if.mshr_stalls[31:0];
`CSR_MPM_TCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_tcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`endif
`CSR_MPM_TEX_ISSUE_ST : read_data_ro_r = perf_gpu_if.tex_stalls[31:0];
`CSR_MPM_TEX_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.tex_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
`endif
@ -263,6 +269,8 @@ module VX_csr_data #(
`CSR_MPM_RCACHE_MSHR_ST :read_data_ro_r = perf_rcache_if.mshr_stalls[31:0];
`CSR_MPM_RCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_rcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`endif
`CSR_MPM_RASTER_ISSUE_ST : read_data_ro_r = perf_gpu_if.raster_stalls[31:0];
`CSR_MPM_RASTER_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.raster_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
`endif
@ -293,6 +301,8 @@ module VX_csr_data #(
`CSR_MPM_OCACHE_MSHR_ST :read_data_ro_r = perf_ocache_if.mshr_stalls[31:0];
`CSR_MPM_OCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_ocache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`endif
`CSR_MPM_ROP_ISSUE_ST : read_data_ro_r = perf_gpu_if.rop_stalls[31:0];
`CSR_MPM_ROP_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.rop_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
`endif
@ -312,6 +322,13 @@ module VX_csr_data #(
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
`ifdef PERF_ENABLE
`ifdef EXT_IMADD_ENABLE
wire [`PERF_CTR_BITS-1:0] perf_imadd_stalls = perf_gpu_if.imadd_stalls;
`UNUSED_VAR (perf_imadd_stalls);
`endif
`endif
`ifdef EXT_F_ENABLE
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
`endif

View file

@ -16,6 +16,7 @@ module VX_csr_unit #(
`ifdef PERF_ENABLE
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
VX_perf_gpu_if.slave perf_gpu_if,
`endif
`ifdef EXT_TEX_ENABLE
@ -148,6 +149,7 @@ module VX_csr_unit #(
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
.perf_pipeline_if(perf_pipeline_if),
.perf_gpu_if (perf_gpu_if),
`ifdef EXT_TEX_ENABLE
.perf_tex_if (perf_tex_if),
.perf_tcache_if (perf_tcache_if),

View file

@ -101,6 +101,10 @@ module VX_execute #(
VX_fpu_to_csr_if fpu_to_csr_if();
`endif
`ifdef PERF_ENABLE
VX_perf_gpu_if perf_gpu_if();
`endif
`RESET_RELAY (alu_reset, reset);
`RESET_RELAY (lsu_reset, reset);
`RESET_RELAY (csr_reset, reset);
@ -140,6 +144,7 @@ module VX_execute #(
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
.perf_pipeline_if(perf_pipeline_if),
.perf_gpu_if (perf_gpu_if),
`endif
.gpu_pending (gpu_pending),
@ -206,6 +211,10 @@ module VX_execute #(
.clk (clk),
.reset (gpu_reset),
.gpu_req_if (gpu_req_if),
`ifdef PERF_ENABLE
.perf_gpu_if (perf_gpu_if),
`endif
`ifdef EXT_TEX_ENABLE
.tex_csr_if (tex_csr_if),

View file

@ -13,6 +13,10 @@ module VX_gpu_unit #(
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_gpu_if.master perf_gpu_if,
`endif
// Inputs
VX_gpu_req_if.slave gpu_req_if,
@ -114,7 +118,7 @@ module VX_gpu_unit #(
assign barrier.size_m1 = `UP(`NW_BITS)'(rs2_data - 1);
// Warp control response
wire wctl_req_valid = gpu_req_valid & (is_wspawn | is_tmc | is_split | is_join | is_bar | is_pred);
wire wctl_req_valid = gpu_req_valid && (is_wspawn | is_tmc | is_split | is_join | is_bar | is_pred);
wire wctl_rsp_valid = wctl_req_valid;
wire [WCTL_DATAW-1:0] wctl_rsp_data = {tmc, wspawn, split, barrier};
wire wctl_rsp_ready;
@ -368,4 +372,60 @@ module VX_gpu_unit #(
end
assign req_pending = req_pending_r;
`ifdef PERF_ENABLE
`ifdef EXT_TEX_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_tex_stalls;
always @(posedge clk) begin
if (reset) begin
perf_tex_stalls <= 0;
end else begin
perf_tex_stalls <= perf_tex_stalls + `PERF_CTR_BITS'(tex_agent_if.valid && ~tex_agent_if.ready);
end
end
assign perf_gpu_if.tex_stalls = perf_tex_stalls;
`endif
`ifdef EXT_RASTER_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_raster_stalls;
always @(posedge clk) begin
if (reset) begin
perf_raster_stalls <= 0;
end else begin
perf_raster_stalls <= perf_raster_stalls + `PERF_CTR_BITS'(raster_agent_if.valid && ~raster_agent_if.ready);
end
end
assign perf_gpu_if.raster_stalls = perf_raster_stalls;
`endif
`ifdef EXT_ROP_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_rop_stalls;
always @(posedge clk) begin
if (reset) begin
perf_rop_stalls <= 0;
end else begin
perf_rop_stalls <= perf_rop_stalls + `PERF_CTR_BITS'(rop_agent_if.valid && ~rop_agent_if.ready);
end
end
assign perf_gpu_if.rop_stalls = perf_rop_stalls;
`endif
`ifdef EXT_IMADD_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_imadd_stalls;
always @(posedge clk) begin
if (reset) begin
perf_imadd_stalls <= 0;
end else begin
perf_imadd_stalls <= perf_imadd_stalls + `PERF_CTR_BITS'(imadd_valid_in && ~imadd_ready_in);
end
end
assign perf_gpu_if.imadd_stalls = perf_imadd_stalls;
`endif
reg [`PERF_CTR_BITS-1:0] perf_wctl_stalls;
always @(posedge clk) begin
if (reset) begin
perf_wctl_stalls <= 0;
end else begin
perf_wctl_stalls <= perf_wctl_stalls + `PERF_CTR_BITS'(wctl_req_valid && ~wctl_req_ready);
end
end
assign perf_gpu_if.wctl_stalls = perf_wctl_stalls;
`endif
endmodule

View file

@ -216,8 +216,8 @@ module VX_issue #(
`endif
`EX_LSU: perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'(1);
`EX_CSR: perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'(1);
//`EX_GPU:
default: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'(1);
`EX_GPU: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'(1);
default:;
endcase
end
end

View file

@ -322,7 +322,8 @@ module VX_lsu_unit #(
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
default: rsp_data[i] = rsp_data32;
`INST_FMT_W: rsp_data[i] = rsp_data32;
default: rsp_data[i] = 'x;
endcase
end
end

View file

@ -107,6 +107,9 @@
`define CSR_MPM_MEM_WRITES_H 12'hB99
`define CSR_MPM_MEM_LAT 12'hB1A // memory latency
`define CSR_MPM_MEM_LAT_H 12'hB9A
// PERF: wctl
`define CSR_MPM_WCTL_ISSUE_ST 12'hB1B // issue stalls
`define CSR_MPM_WCTL_ISSUE_ST_H 12'hB9B
// Machine Performance-monitoring texture counters
// PERF: texture unit
@ -123,6 +126,9 @@
`define CSR_MPM_TCACHE_BANK_ST_H 12'hB87
`define CSR_MPM_TCACHE_MSHR_ST 12'hB08 // MSHR stalls
`define CSR_MPM_TCACHE_MSHR_ST_H 12'hB88
// PERF: pipeline
`define CSR_MPM_TEX_ISSUE_ST 12'hB09 // issue stalls
`define CSR_MPM_TEX_ISSUE_ST_H 12'hB89
// Machine Performance-monitoring raster counters
// PERF: raster unit
@ -141,6 +147,9 @@
`define CSR_MPM_RCACHE_BANK_ST_H 12'hB88
`define CSR_MPM_RCACHE_MSHR_ST 12'hB09 // MSHR stalls
`define CSR_MPM_RCACHE_MSHR_ST_H 12'hB89
// PERF: pipeline
`define CSR_MPM_RASTER_ISSUE_ST 12'hB0A // issue stalls
`define CSR_MPM_RASTER_ISSUE_ST_H 12'hB8A
// Machine Performance-monitoring rop counters
// PERF: rop unit
@ -165,6 +174,9 @@
`define CSR_MPM_OCACHE_BANK_ST_H 12'hB8B
`define CSR_MPM_OCACHE_MSHR_ST 12'hB0C // MSHR stalls
`define CSR_MPM_OCACHE_MSHR_ST_H 12'hB8C
// PERF: pipeline
`define CSR_MPM_ROP_ISSUE_ST 12'hB0D // issue stalls
`define CSR_MPM_ROP_ISSUE_ST_H 12'hB8D
// Machine Information Registers

View file

@ -52,6 +52,7 @@ module VX_warp_sched #(
wire [`NUM_THREADS-1:0] schedule_tmask;
wire [31:0] schedule_pc;
wire schedule_valid;
wire schedule_ready;
wire warp_scheduled;
reg [`PERF_CTR_BITS-1:0] cycles;
@ -243,9 +244,7 @@ module VX_warp_sched #(
assign {schedule_tmask, schedule_pc} = schedule_data[schedule_wid];
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
assign warp_scheduled = schedule_valid && ~stall_out;
assign warp_scheduled = schedule_valid && schedule_ready;
`ifdef SIMULATION
assign instr_uuid = (issued_instrs[schedule_wid] * `NUM_WARPS * `NUM_CORES * `NUM_CLUSTERS)
@ -255,15 +254,18 @@ module VX_warp_sched #(
assign instr_uuid = 0;
`endif
VX_pipe_register #(
.DATAW (1 + `UP(`UUID_BITS) + `NUM_THREADS + 32 + `UP(`NW_BITS)),
.RESETW (1)
VX_generic_buffer #(
.DATAW (`UP(`UUID_BITS) + `NUM_THREADS + 32 + `UP(`NW_BITS)),
.OUT_REG (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({schedule_valid, instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
.data_out ({ifetch_req_if.valid, ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
.valid_in (schedule_valid),
.ready_in (schedule_ready),
.data_in ({instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
.data_out ({ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}),
.valid_out (ifetch_req_if.valid),
.ready_out (ifetch_req_if.ready)
);
assign busy = (active_warps != 0);

View file

@ -303,9 +303,9 @@ module VX_mem_scheduler #(
// Handle memory responses ////////////////////////////////////////////////
reg [QUEUE_SIZE-1:0][REQ_SIZEW-1:0] rsp_rem_size;
wire [REQ_SIZEW-1:0] rsp_rem_size_n;
wire [`UP(BATCH_SEL_BITS)-1:0] rsp_batch_idx;
reg [REQ_SIZEW-1:0] rsp_rem_size [QUEUE_SIZE-1:0];
wire [REQ_SIZEW-1:0] rsp_rem_size_n;
wire [`UP(BATCH_SEL_BITS)-1:0] rsp_batch_idx;
// Select memory response
VX_mem_rsp_sel #(
@ -329,10 +329,18 @@ module VX_mem_scheduler #(
);
wire [REQ_SIZEW-1:0] reqq_size;
wire [NUM_BANKS-1:0] mem_rsp_mask_x;
`POP_COUNT(reqq_size, reqq_mask);
wire [BANK_SIZEW-1:0] mem_rsp_size;
`POP_COUNT(mem_rsp_size, mem_rsp_mask_s);
if (NUM_BANKS > 1) begin
`POP_COUNT(mem_rsp_size, mem_rsp_mask_s);
assign mem_rsp_mask_x = mem_rsp_mask_s;
end else begin
assign mem_rsp_size = 1'b1;
assign mem_rsp_mask_x = 1'b1;
`UNUSED_VAR (mem_rsp_mask_s)
end
if (NUM_BATCHES > 1) begin
assign rsp_batch_idx = mem_rsp_tag_s[QUEUE_ADDRW +: BATCH_SEL_BITS];
@ -363,49 +371,40 @@ module VX_mem_scheduler #(
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam SIZE = ((i + 1) * NUM_BANKS > NUM_REQS) ? REM_BATCH_SIZE : NUM_BANKS;
assign crsp_mask[i * NUM_BANKS +: SIZE] = {SIZE{(i == rsp_batch_idx)}} & mem_rsp_mask_s[SIZE-1:0];
assign crsp_mask[i * NUM_BANKS +: SIZE] = {SIZE{(i == rsp_batch_idx)}} & mem_rsp_mask_x[SIZE-1:0];
assign crsp_data[i * NUM_BANKS +: SIZE] = mem_rsp_data_s[SIZE-1:0];
end
end else begin
reg [QUEUE_SIZE-1:0][NUM_BATCHES-1:0][NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_store;
reg [NUM_BATCHES-1:0][NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_store_n;
reg [NUM_BATCHES-1:0][NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_store [QUEUE_SIZE-1:0];
reg [QUEUE_SIZE-1:0][NUM_REQS-1:0] rsp_orig_mask;
wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] mem_rsp_data_m;
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
assign crsp_valid = mem_rsp_valid_s & rsp_complete;
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign mem_rsp_data_m[i] = {DATA_WIDTH{mem_rsp_mask_s[i]}} & mem_rsp_data_s[i];
end
always @(*) begin
rsp_store_n = rsp_store[ibuf_raddr];
rsp_store_n[rsp_batch_idx] |= mem_rsp_data_m;
end
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam SIZE = ((i + 1) * NUM_BANKS > NUM_REQS) ? REM_BATCH_SIZE : NUM_BANKS;
assign crsp_data[i * NUM_BANKS +: SIZE] = rsp_store_n[i][SIZE-1:0];
assign mem_rsp_data_m[i] = {DATA_WIDTH{mem_rsp_mask_x[i]}} & mem_rsp_data_s[i];
end
always @(posedge clk) begin
if (reset) begin
rsp_store <= '0;
end else begin
if (ibuf_push) begin
rsp_store[ibuf_waddr] <= '0;
rsp_orig_mask[ibuf_waddr] <= req_mask;
end
if (mem_rsp_fire) begin
rsp_store[ibuf_raddr] <= rsp_store_n;
end
if (ibuf_push) begin
rsp_store[ibuf_waddr] <= '0;
rsp_orig_mask[ibuf_waddr] <= req_mask;
end
if (mem_rsp_fire) begin
rsp_store[ibuf_raddr][rsp_batch_idx] <= mem_rsp_data_m;
end
end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
assign crsp_valid = mem_rsp_valid_s & rsp_complete;
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
localparam SIZE = ((i + 1) * NUM_BANKS > NUM_REQS) ? REM_BATCH_SIZE : NUM_BANKS;
assign crsp_data[i * NUM_BANKS +: SIZE] = rsp_store[ibuf_raddr][i][SIZE-1:0]
| ({(SIZE * DATA_WIDTH){(i == rsp_batch_idx)}} & mem_rsp_data_m[SIZE-1:0]);
end
end
@ -455,24 +454,28 @@ module VX_mem_scheduler #(
`UNUSED_VAR (mem_req_dbg_uuid)
`UNUSED_VAR (mem_rsp_dbg_uuid)
reg [QUEUE_SIZE-1:0][(`UP(`UUID_BITS) + TAG_ONLY_WIDTH + 64 + 1)-1:0] pending_reqs;
reg [(`UP(`UUID_BITS) + TAG_ONLY_WIDTH + 64)-1:0] pending_reqs [QUEUE_SIZE-1:0];
reg [QUEUE_SIZE-1:0] pending_req_valids;
always @(posedge clk) begin
if (reset) begin
pending_reqs <= '0;
end begin
pending_req_valids <= '0;
end else begin
if (ibuf_push) begin
pending_reqs[ibuf_waddr] <= {req_dbg_uuid, req_tag_only, $time, 1'b1};
pending_reqs[ibuf_waddr] <= {req_dbg_uuid, req_tag_only, $time};
pending_req_valids[ibuf_waddr] <= 1'b1;
end
if (ibuf_pop) begin
pending_reqs[ibuf_raddr] <= '0;
pending_req_valids[ibuf_raddr] <= 1'b0;
end
end
for (integer i = 0; i < QUEUE_SIZE; ++i) begin
if (pending_reqs[i][0]) begin
`ASSERT(($time - pending_reqs[i][1 +: 64]) < `STALL_TIMEOUT,
if (pending_req_valids[i]) begin
`ASSERT(($time - pending_reqs[i][0 +: 64]) < `STALL_TIMEOUT,
("%t: *** %s response timeout: remaining=%0d, tag=0x%0h (#%0d)",
$time, INSTANCE_ID, rsp_rem_size[i], pending_reqs[i][1+64 +: TAG_ONLY_WIDTH], pending_reqs[i][1+64+TAG_ONLY_WIDTH +: `UP(`UUID_BITS)]));
$time, INSTANCE_ID, rsp_rem_size[i], pending_reqs[i][64 +: TAG_ONLY_WIDTH], pending_reqs[i][64+TAG_ONLY_WIDTH +: `UP(`UUID_BITS)]));
end
end
end

View file

@ -10,7 +10,94 @@ module VX_onehot_mux #(
input wire [N-1:0] sel_in,
output wire [DATAW-1:0] data_out
);
if (N > 1) begin
if (N == 1) begin
`UNUSED_VAR (sel_in)
assign data_out = data_in;
end else if (N == 2) begin
`UNUSED_VAR (sel_in)
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
end else if (N == 3) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
3'b001: data_out_r = data_in[0];
3'b010: data_out_r = data_in[1];
3'b100: data_out_r = data_in[2];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 4) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
4'b0001: data_out_r = data_in[0];
4'b0010: data_out_r = data_in[1];
4'b0100: data_out_r = data_in[2];
4'b1000: data_out_r = data_in[3];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 5) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
5'b00001: data_out_r = data_in[0];
5'b00010: data_out_r = data_in[1];
5'b00100: data_out_r = data_in[2];
5'b01000: data_out_r = data_in[3];
5'b10000: data_out_r = data_in[4];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 6) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
6'b000001: data_out_r = data_in[0];
6'b000010: data_out_r = data_in[1];
6'b000100: data_out_r = data_in[2];
6'b001000: data_out_r = data_in[3];
6'b010000: data_out_r = data_in[4];
6'b100000: data_out_r = data_in[5];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 7) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
7'b0000001: data_out_r = data_in[0];
7'b0000010: data_out_r = data_in[1];
7'b0000100: data_out_r = data_in[2];
7'b0001000: data_out_r = data_in[3];
7'b0010000: data_out_r = data_in[4];
7'b0100000: data_out_r = data_in[5];
7'b1000000: data_out_r = data_in[6];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else if (N == 8) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
case (sel_in)
8'b00000001: data_out_r = data_in[0];
8'b00000010: data_out_r = data_in[1];
8'b00000100: data_out_r = data_in[2];
8'b00001000: data_out_r = data_in[3];
8'b00010000: data_out_r = data_in[4];
8'b00100000: data_out_r = data_in[5];
8'b01000000: data_out_r = data_in[6];
8'b10000000: data_out_r = data_in[7];
default: data_out_r = 'x;
endcase
end
assign data_out = data_out_r;
end else begin
if (MODEL == 1) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
@ -21,7 +108,7 @@ module VX_onehot_mux #(
end
end
end
assign data_out = data_out_r;
assign data_out = data_out_r;
end else if (MODEL == 2) begin
reg [DATAW-1:0] data_out_r;
always @(*) begin
@ -44,9 +131,6 @@ module VX_onehot_mux #(
assign data_out[i] = (| gather);
end
end
end else begin
`UNUSED_VAR (sel_in)
assign data_out = data_in;
end
endmodule

View file

@ -114,7 +114,7 @@ module VX_raster_mem #(
if (reset) begin
state <= STATE_IDLE;
mem_req_valid <= 0;
end begin
end else begin
// deassert memory request when fired
if (mem_req_fire) begin
mem_req_valid <= 0;

View file

@ -75,7 +75,7 @@ module VX_raster_te #(
always @(posedge clk) begin
if (reset) begin
tile_valid <= 0;
end begin
end else begin
if (~stall) begin
tile_valid <= 0;
if (fifo_arb_valid) begin

View file

@ -272,7 +272,7 @@ module VX_raster_unit #(
if (reset) begin
perf_pending_reads <= 0;
end else begin
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'(perf_pending_reads_cycle);
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'($signed(perf_pending_reads_cycle));
end
end

View file

@ -266,40 +266,39 @@ module VX_rop_unit #(
.full (pending_reads_full),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
);
wire mem_req_stall = mem_req_valid_r & ~mem_req_ready_r;
VX_pipe_register #(
.DATAW (1 + 1 + NUM_LANES * (1 + 1 + 2 * `ROP_DIM_BITS + $bits(rgba_t) + `ROP_DEPTH_BITS + `ROP_STENCIL_BITS + 1) + MEM_TAG_WIDTH),
.RESETW (1)
) mem_req_pipe_reg (
.clk (clk),
.reset (reset),
.enable (~mem_req_stall),
.data_in ({mem_req_valid, mem_req_rw, mem_req_mask, mem_req_ds_pass, mem_req_pos_x, mem_req_pos_y, mem_req_color, mem_req_depth, mem_req_stencil, mem_req_face, mem_req_tag}),
.data_out ({mem_req_valid_r, mem_req_rw_r, mem_req_mask_r, mem_req_ds_pass_r, mem_req_pos_x_r, mem_req_pos_y_r, mem_req_color_r, mem_req_depth_r, mem_req_stencil_r, mem_req_face_r, mem_req_tag_r})
);
assign mem_req_ready = ~mem_req_stall;
VX_generic_buffer #(
.DATAW (1 + NUM_LANES * (1 + 1 + 2 * `ROP_DIM_BITS + $bits(rgba_t) + `ROP_DEPTH_BITS + `ROP_STENCIL_BITS + 1) + MEM_TAG_WIDTH),
.OUT_REG (1)
) mem_req_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid),
.ready_in (mem_req_ready),
.data_in ({mem_req_rw, mem_req_mask, mem_req_ds_pass, mem_req_pos_x, mem_req_pos_y, mem_req_color, mem_req_depth, mem_req_stencil, mem_req_face, mem_req_tag}),
.data_out ({mem_req_rw_r, mem_req_mask_r, mem_req_ds_pass_r, mem_req_pos_x_r, mem_req_pos_y_r, mem_req_color_r, mem_req_depth_r, mem_req_stencil_r, mem_req_face_r, mem_req_tag_r}),
.valid_out (mem_req_valid_r),
.ready_out (mem_req_ready_r)
);
`ifdef PERF_ENABLE
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_rd_req_per_cycle;
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_wr_req_per_cycle;
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_rsp_per_cycle;
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_rd_rsp_per_cycle;
wire [$clog2(OCACHE_NUM_REQS+1)+1-1:0] perf_pending_reads_cycle;
wire [OCACHE_NUM_REQS-1:0] perf_mem_rd_req_per_mask = cache_req_if.valid & ~cache_req_if.rw & cache_req_if.ready;
wire [OCACHE_NUM_REQS-1:0] perf_mem_wr_req_per_mask = cache_req_if.valid & cache_req_if.rw & cache_req_if.ready;
wire [OCACHE_NUM_REQS-1:0] perf_mem_rsp_per_mask = cache_rsp_if.valid & cache_rsp_if.ready;
wire [OCACHE_NUM_REQS-1:0] perf_mem_rd_rsp_per_mask = cache_rsp_if.valid & cache_rsp_if.ready;
`POP_COUNT(perf_mem_rd_req_per_cycle, perf_mem_rd_req_per_mask);
`POP_COUNT(perf_mem_wr_req_per_cycle, perf_mem_wr_req_per_mask);
`POP_COUNT(perf_mem_rsp_per_cycle, perf_mem_rsp_per_mask);
`POP_COUNT(perf_mem_rd_rsp_per_cycle, perf_mem_rd_rsp_per_mask);
reg [`PERF_CTR_BITS-1:0] perf_pending_reads;
assign perf_pending_reads_cycle = perf_mem_rd_req_per_cycle - perf_mem_rsp_per_cycle;
assign perf_pending_reads_cycle = perf_mem_rd_req_per_cycle - perf_mem_rd_rsp_per_cycle;
always @(posedge clk) begin
if (reset) begin

View file

@ -170,29 +170,29 @@ module VX_tex_mem #(
always @(*) begin
case (mem_rsp_lgstride)
0: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[7:0]);
1: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[15:0]);
default: mem_rsp_data_qual[i][j] = rsp_data_shifted;
0: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[7:0]);
1: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[15:0]);
2: mem_rsp_data_qual[i][j] = rsp_data_shifted;
default: mem_rsp_data_qual[i][j] = 'x;
endcase
end
end
end
wire stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + REQ_INFOW + (4 * NUM_LANES * 32)),
.RESETW (1)
VX_generic_buffer #(
.DATAW (REQ_INFOW + (4 * NUM_LANES * 32)),
.OUT_REG (1)
) rsp_pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({mem_rsp_valid, mem_rsp_info, mem_rsp_data_qual}),
.data_out ({rsp_valid, rsp_info, rsp_data})
.clk (clk),
.reset (reset),
.valid_in (mem_rsp_valid),
.ready_in (mem_rsp_ready),
.data_in ({mem_rsp_info, mem_rsp_data_qual}),
.data_out ({rsp_info, rsp_data}),
.valid_out (rsp_valid),
.ready_out (rsp_ready)
);
assign mem_rsp_ready = ~stall_out;
`ifdef DBG_TRACE_TEX
always @(posedge clk) begin

View file

@ -14,9 +14,9 @@ module VX_tex_stride (
`TEX_FORMAT_A1R5G5B5,
`TEX_FORMAT_A4R4G4B4,
`TEX_FORMAT_A8L8: log_stride_r = 1;
// `TEX_FORMAT_L8:
// `TEX_FORMAT_A8:
default: log_stride_r = 0;
`TEX_FORMAT_L8,
`TEX_FORMAT_A8: log_stride_r = 0;
default: log_stride_r = 'x;
endcase
end

View file

@ -45,11 +45,11 @@ module VX_tex_unit #(
wire req_valid;
wire [NUM_LANES-1:0] req_mask;
logic [`TEX_FILTER_BITS-1:0] req_filter;
logic [`TEX_FORMAT_BITS-1:0] req_format;
logic [1:0][`TEX_WRAP_BITS-1:0] req_wraps;
wire [`TEX_FILTER_BITS-1:0] req_filter;
wire [`TEX_FORMAT_BITS-1:0] req_format;
wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps;
wire [1:0][`TEX_LOD_BITS-1:0] req_logdims;
logic [`TEX_ADDR_BITS-1:0] req_baseaddr;
wire [`TEX_ADDR_BITS-1:0] req_baseaddr;
wire [1:0][NUM_LANES-1:0][31:0] req_coords;
wire [NUM_LANES-1:0][`TEX_LOD_BITS-1:0] req_miplevel, sel_miplevel;
wire [NUM_LANES-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff, sel_mipoff;
@ -61,22 +61,20 @@ module VX_tex_unit #(
assign sel_mipoff[i] = tex_dcrs.mipoff[sel_miplevel[i]];
end
wire stall_in = req_valid && ~req_ready;
VX_pipe_register #(
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_FORMAT_BITS + 2 * `TEX_WRAP_BITS + 2 * `TEX_LOD_BITS + `TEX_ADDR_BITS + NUM_LANES * (2 * 32 + `TEX_LOD_BITS + `TEX_MIPOFF_BITS) + TAG_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~stall_in),
.data_in ({tex_req_if.valid, tex_req_if.mask, tex_dcrs.filter, tex_dcrs.format, tex_dcrs.wraps, tex_dcrs.logdims, tex_dcrs.baseaddr, tex_req_if.coords, sel_miplevel, sel_mipoff, tex_req_if.tag}),
.data_out ({req_valid, req_mask, req_filter, req_format, req_wraps, req_logdims, req_baseaddr, req_coords, req_miplevel, req_mipoff, req_tag})
VX_generic_buffer #(
.DATAW (NUM_LANES + `TEX_FILTER_BITS + `TEX_FORMAT_BITS + 2 * `TEX_WRAP_BITS + 2 * `TEX_LOD_BITS + `TEX_ADDR_BITS + NUM_LANES * (2 * 32 + `TEX_LOD_BITS + `TEX_MIPOFF_BITS) + TAG_WIDTH),
.OUT_REG (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.valid_in (tex_req_if.valid),
.ready_in (tex_req_if.ready),
.data_in ({tex_req_if.mask, tex_dcrs.filter, tex_dcrs.format, tex_dcrs.wraps, tex_dcrs.logdims, tex_dcrs.baseaddr, tex_req_if.coords, sel_miplevel, sel_mipoff, tex_req_if.tag}),
.data_out ({req_mask, req_filter, req_format, req_wraps, req_logdims, req_baseaddr, req_coords, req_miplevel, req_mipoff, req_tag}),
.valid_out (req_valid),
.ready_out (req_ready)
);
// can accept new request?
assign tex_req_if.ready = ~stall_in;
// address generation
wire mem_req_valid;
@ -239,7 +237,7 @@ module VX_tex_unit #(
end
assign perf_tex_if.mem_reads = perf_mem_reads;
assign perf_tex_if.mem_latency = perf_pending_reads;
assign perf_tex_if.mem_latency = perf_mem_latency;
`endif
`ifdef DBG_TRACE_TEX

View file

@ -29,9 +29,18 @@ DBG_FLAGS += $(DBG_TRACE_FLAGS)
CONFIGS += -DEXT_GFX_ENABLE
#CONFIGS += -DNUM_ROP_UNITS=2
#CONFIGS += -DNUM_TEX_UNITS=4
#CONFIGS += -DNUM_RASTER_UNITS=2
#CONFIGS += -DTCACHE_NUM_BANKS=1
#CONFIGS += -DOCACHE_NUM_BANKS=1
#CONFIGS += -DOCACHE_NUM_BANKS=1
#CONFIGS += -DL1_DISABLE
#CONFIGS += -DSM_DISABLE
#CONFIGS += -DRCACHE_DISABLE -DOCACHE_DISABLE -DTCACHE_DISABLE
#CONFIGS += -DRCACHE_DISABLE
#CONFIGS += -DOCACHE_DISABLE
#CONFIGS += -DTCACHE_DISABLE
ifeq ($(DEVICE_FAMILY), stratix10)
CONFIGS += -DALTERA_S10

View file

@ -3,5 +3,4 @@
# FPGA programming
# first argument is the bitstream
echo "fpgaconf --bus 0xaf $1"
fpgaconf --bus 0xaf $1

View file

@ -21,6 +21,7 @@ set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
set_global_assignment -name MUX_RESTRUCTURE ON
@ -28,6 +29,7 @@ set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_DUPLICATION ON
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100

View file

@ -1,68 +1,70 @@
BUILD_DIR ?= build
BUILD_DIR_X=$(BUILD_DIR)_$(DEVICE_FAMILY)
.PHONY: dogfood unittest pipeline smem cache fpu_core core vortex vortex-gfx top top-gfx texunit test
dogfood:
mkdir -p dogfood/$(BUILD_DIR)
cp dogfood/Makefile dogfood/$(BUILD_DIR)
$(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p dogfood/$(BUILD_DIR_X)
cp dogfood/Makefile dogfood/$(BUILD_DIR_X)
$(MAKE) -C dogfood/$(BUILD_DIR_X) clean && $(MAKE) -C dogfood/$(BUILD_DIR_X) > dogfood/$(BUILD_DIR_X)/build.log 2>&1 &
unittest:
mkdir -p unittest/$(BUILD_DIR)
cp unittest/Makefile unittest/$(BUILD_DIR)
$(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p unittest/$(BUILD_DIR_X)
cp unittest/Makefile unittest/$(BUILD_DIR_X)
$(MAKE) -C unittest/$(BUILD_DIR_X) clean && $(MAKE) -C unittest/$(BUILD_DIR_X) > unittest/$(BUILD_DIR_X)/build.log 2>&1 &
pipeline:
mkdir -p pipeline/$(BUILD_DIR)
cp pipeline/Makefile pipeline/$(BUILD_DIR)
$(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p pipeline/$(BUILD_DIR_X)
cp pipeline/Makefile pipeline/$(BUILD_DIR_X)
$(MAKE) -C pipeline/$(BUILD_DIR_X) clean && $(MAKE) -C pipeline/$(BUILD_DIR_X) > pipeline/$(BUILD_DIR_X)/build.log 2>&1 &
smem:
mkdir -p smem/$(BUILD_DIR)
cp smem/Makefile smem/$(BUILD_DIR)
$(MAKE) -C smem/$(BUILD_DIR) clean && $(MAKE) -C smem/$(BUILD_DIR) > smem/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p smem/$(BUILD_DIR_X)
cp smem/Makefile smem/$(BUILD_DIR_X)
$(MAKE) -C smem/$(BUILD_DIR_X) clean && $(MAKE) -C smem/$(BUILD_DIR_X) > smem/$(BUILD_DIR_X)/build.log 2>&1 &
cache:
mkdir -p cache/$(BUILD_DIR)
cp cache/Makefile cache/$(BUILD_DIR)
$(MAKE) -C cache/$(BUILD_DIR) clean && $(MAKE) -C cache/$(BUILD_DIR) > cache/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p cache/$(BUILD_DIR_X)
cp cache/Makefile cache/$(BUILD_DIR_X)
$(MAKE) -C cache/$(BUILD_DIR_X) clean && $(MAKE) -C cache/$(BUILD_DIR_X) > cache/$(BUILD_DIR_X)/build.log 2>&1 &
fpu_core:
mkdir -p fpu_core/$(BUILD_DIR)
cp fpu_core/Makefile fpu_core/$(BUILD_DIR)
$(MAKE) -C fpu_core/$(BUILD_DIR) clean && $(MAKE) -C fpu_core/$(BUILD_DIR) > fpu_core/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p fpu_core/$(BUILD_DIR_X)
cp fpu_core/Makefile fpu_core/$(BUILD_DIR_X)
$(MAKE) -C fpu_core/$(BUILD_DIR_X) clean && $(MAKE) -C fpu_core/$(BUILD_DIR_X) > fpu_core/$(BUILD_DIR_X)/build.log 2>&1 &
core:
mkdir -p core/$(BUILD_DIR)
cp core/Makefile core/$(BUILD_DIR)
$(MAKE) -C core/$(BUILD_DIR) clean && $(MAKE) -C core/$(BUILD_DIR) > core/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p core/$(BUILD_DIR_X)
cp core/Makefile core/$(BUILD_DIR_X)
$(MAKE) -C core/$(BUILD_DIR_X) clean && $(MAKE) -C core/$(BUILD_DIR_X) > core/$(BUILD_DIR_X)/build.log 2>&1 &
vortex:
mkdir -p vortex/$(BUILD_DIR)
cp vortex/Makefile vortex/$(BUILD_DIR)
$(MAKE) -C vortex/$(BUILD_DIR) clean && $(MAKE) -C vortex/$(BUILD_DIR) > vortex/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p vortex/$(BUILD_DIR_X)
cp vortex/Makefile vortex/$(BUILD_DIR_X)
$(MAKE) -C vortex/$(BUILD_DIR_X) clean && $(MAKE) -C vortex/$(BUILD_DIR_X) > vortex/$(BUILD_DIR_X)/build.log 2>&1 &
vortex-gfx:
mkdir -p vortex-gfx/$(BUILD_DIR)
cp vortex-gfx/Makefile vortex-gfx/$(BUILD_DIR)
$(MAKE) -C vortex-gfx/$(BUILD_DIR) clean && $(MAKE) -C vortex-gfx/$(BUILD_DIR) > vortex-gfx/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p vortex-gfx/$(BUILD_DIR_X)
cp vortex-gfx/Makefile vortex-gfx/$(BUILD_DIR_X)
$(MAKE) -C vortex-gfx/$(BUILD_DIR_X) clean && $(MAKE) -C vortex-gfx/$(BUILD_DIR_X) > vortex-gfx/$(BUILD_DIR_X)/build.log 2>&1 &
top:
mkdir -p top/$(BUILD_DIR)
cp top/Makefile top/$(BUILD_DIR)
$(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p top/$(BUILD_DIR_X)
cp top/Makefile top/$(BUILD_DIR_X)
$(MAKE) -C top/$(BUILD_DIR_X) clean && $(MAKE) -C top/$(BUILD_DIR_X) > top/$(BUILD_DIR_X)/build.log 2>&1 &
top-gfx:
mkdir -p top-gfx/$(BUILD_DIR)
cp top-gfx/Makefile top-gfx/$(BUILD_DIR)
$(MAKE) -C top-gfx/$(BUILD_DIR) clean && $(MAKE) -C top-gfx/$(BUILD_DIR) > top-gfx/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p top-gfx/$(BUILD_DIR_X)
cp top-gfx/Makefile top-gfx/$(BUILD_DIR_X)
$(MAKE) -C top-gfx/$(BUILD_DIR_X) clean && $(MAKE) -C top-gfx/$(BUILD_DIR_X) > top-gfx/$(BUILD_DIR_X)/build.log 2>&1 &
texunit:
mkdir -p texunit/$(BUILD_DIR)
cp texunit/Makefile texunit/$(BUILD_DIR)
$(MAKE) -C texunit/$(BUILD_DIR) clean && $(MAKE) -C texunit/$(BUILD_DIR) > texunit/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p texunit/$(BUILD_DIR_X)
cp texunit/Makefile texunit/$(BUILD_DIR_X)
$(MAKE) -C texunit/$(BUILD_DIR_X) clean && $(MAKE) -C texunit/$(BUILD_DIR_X) > texunit/$(BUILD_DIR_X)/build.log 2>&1 &
test:
mkdir -p test/$(BUILD_DIR)
cp test/Makefile test/$(BUILD_DIR)
$(MAKE) -C test/$(BUILD_DIR) clean && $(MAKE) -C test/$(BUILD_DIR) > test/$(BUILD_DIR)/build.log 2>&1 &
mkdir -p test/$(BUILD_DIR_X)
cp test/Makefile test/$(BUILD_DIR_X)
$(MAKE) -C test/$(BUILD_DIR_X) clean && $(MAKE) -C test/$(BUILD_DIR_X) > test/$(BUILD_DIR_X)/build.log 2>&1 &

View file

@ -1,72 +1,7 @@
PROJECT = VX_cache
TOP_LEVEL_ENTITY = VX_cache
SRC_FILE = VX_cache.sv
RTL_DIR = ../../../../../rtl
PROJECT = VX_cache_syn
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
include ../../common.mk
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,81 +1,9 @@
PROJECT = Core
TOP_LEVEL_ENTITY = VX_core
SRC_FILE = VX_core.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
PROJECT = VX_core_syn
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,87 +1,8 @@
PROJECT = VX_fpu_fpga
TOP_LEVEL_ENTITY = VX_fpu_fpga
SRC_FILE = VX_fpu_fpga.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);$(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(IP_DIR)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
# -set "FPU_CVT2"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
timing: $(PROJECT_FILES)
quartus_sh -t ../../timing-html.tcl -project $(PROJECT)
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,72 +1,7 @@
PROJECT = VX_shared_mem
TOP_LEVEL_ENTITY = VX_shared_mem
SRC_FILE = VX_shared_mem.sv
RTL_DIR = ../../../../../rtl
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
include ../../common.mk
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,83 +1,11 @@
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
include ../../common.mk
CONFIGS += -set "EXT_GFX_ENABLE"
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
GFX_INCLUDE = $(RTL_DIR)/tex_unit;$(RTL_DIR)/raster_unit;$(RTL_DIR)/rop_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(GFX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,81 +1,9 @@
PROJECT = Core
TOP_LEVEL_ENTITY = VX_core
SRC_FILE = VX_core.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
PROJECT = VX_tex_unit_syn
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "EXT_TEX_ENABLE"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,23 +1,16 @@
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../../rtl
AFU_DIR = ../../../../../afu/opae
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
CONFIGS += -set "NOPAE"
CONFIGS += -set "EXT_GFX_ENABLE"
CONFIGS += -set "NUM_CORES=4"
CONFIGS += -set "L2_ENABLE"
#CONFIGS += -set "L1_DISABLE"
#CONFIGS += -set "SM_DISABLE"
#CONFIGS += -set "RCACHE_DISABLE" -set "OCACHE_DISABLE" -set "TCACHE_DISABLE"
@ -25,67 +18,3 @@ CONFIGS += -set "NUM_CORES=4"
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit;$(RTL_DIR)/raster_unit;$(RTL_DIR)/rop_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(AFU_DIR);$(AFU_DIR)/ccip;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,89 +1,18 @@
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../../rtl
AFU_DIR = ../../../../../afu/opae
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
CONFIGS += -set "NOPAE"
CONFIGS += -set "NUM_CORES=4"
CONFIGS += -set "L2_ENABLE"
#CONFIGS += -set "L1_DISABLE"
#CONFIGS += -set "SM_DISABLE"
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(AFU_DIR);$(AFU_DIR)/ccip;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,81 +1,11 @@
PROJECT = Unittest
TOP_LEVEL_ENTITY = VX_req_dispatch
SRC_FILE = VX_req_dispatch.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf

View file

@ -1,16 +1,8 @@
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
include ../../common.mk
CONFIGS += -set "EXT_GFX_ENABLE"
@ -24,72 +16,10 @@ CONFIGS += -set "EXT_GFX_ENABLE"
#CONFIGS += -set "NUM_WARPS=2" -set "NUM_THREADS=2"
CONFIGS += -set "NUM_CORES=4"
CONFIGS += -set "NUM_CORES=2"
CONFIGS += -set "L2_ENABLE"
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
GFX_INCLUDE = $(RTL_DIR)/tex_unit;$(RTL_DIR)/raster_unit;$(RTL_DIR)/rop_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(GFX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,16 +1,12 @@
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.sv
RTL_DIR = ../../../../../rtl
THIRD_PARTY_DIR = ../../../../../../third_party
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
IP_DIR = ../../../ip/arria10
include ../../common.mk
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#IP_DIR = ../../../ip/stratix10
CONFIGS += -set "NUM_CORES=2"
CONFIGS += -set "L2_ENABLE"
#CONFIGS += -set "L1_DISABLE"
@ -20,72 +16,6 @@ IP_DIR = ../../../ip/arria10
#CONFIGS += -set "NUM_WARPS=2" -set "NUM_THREADS=2"
CONFIGS += -set "NUM_CORES=4"
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -202,6 +202,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t csr_stalls = 0;
uint64_t alu_stalls = 0;
uint64_t gpu_stalls = 0;
uint64_t wctl_issue_stalls = 0;
// PERF: decode
uint64_t loads = 0;
uint64_t stores = 0;
@ -228,6 +229,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
// PERF: texunit
uint64_t tex_mem_reads = 0;
uint64_t tex_mem_lat = 0;
// PERF: tex issue
uint64_t tex_issue_stalls = 0;
// PERF: tex tcache
uint64_t tcache_reads = 0;
uint64_t tcache_read_misses = 0;
@ -238,6 +241,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t raster_mem_reads = 0;
uint64_t raster_mem_lat = 0;
uint64_t raster_stall_cycles = 0;
// PERF: raster issue
uint64_t raster_issue_stalls = 0;
// PERF: raster cache
uint64_t rcache_reads = 0;
uint64_t rcache_read_misses = 0;
@ -249,6 +254,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t rop_mem_writes = 0;
uint64_t rop_mem_lat = 0;
uint64_t rop_stall_cycles = 0;
// PERF: rop issue
uint64_t rop_issue_stalls = 0;
// PERF: rop ocache
uint64_t ocache_reads = 0;
uint64_t ocache_writes = 0;
@ -319,7 +326,11 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
// gpu_stall
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
gpu_stalls += gpu_stalls_per_core;
gpu_stalls += gpu_stalls_per_core;
// wctl_stall
uint64_t wctl_issue_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_WCTL_ISSUE_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: wctl issue stalls=%ld\n", core_id, wctl_issue_stalls_per_core);
wctl_issue_stalls += wctl_issue_stalls_per_core;
// PERF: decode
// loads
@ -403,6 +414,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
tcache_bank_stalls = get_csr_64(staging_ptr, CSR_MPM_TCACHE_BANK_ST);
tcache_mshr_stalls = get_csr_64(staging_ptr, CSR_MPM_TCACHE_MSHR_ST);
}
// issue_stall
uint64_t issue_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_TEX_ISSUE_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: tex issue stalls=%ld\n", core_id, issue_stalls_per_core);
tex_issue_stalls += issue_stalls_per_core;
#endif
} break;
case DCR_MPM_CLASS_RASTER: {
@ -417,6 +432,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
rcache_bank_stalls = get_csr_64(staging_ptr, CSR_MPM_RCACHE_BANK_ST);
rcache_mshr_stalls = get_csr_64(staging_ptr, CSR_MPM_RCACHE_MSHR_ST);
}
// issue_stall
uint64_t raster_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_RASTER_ISSUE_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: raster issue stalls=%ld\n", core_id, raster_stalls_per_core);
raster_issue_stalls += raster_stalls_per_core;
#endif
} break;
case DCR_MPM_CLASS_ROP: {
@ -434,6 +453,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
ocache_bank_stalls = get_csr_64(staging_ptr, CSR_MPM_OCACHE_BANK_ST);
ocache_mshr_stalls = get_csr_64(staging_ptr, CSR_MPM_OCACHE_MSHR_ST);
}
// issue_stall
uint64_t rop_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ROP_ISSUE_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: rop issue stalls=%ld\n", core_id, rop_stalls_per_core);
rop_issue_stalls += rop_stalls_per_core;
#endif
} break;
default:
@ -461,6 +484,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
fprintf(stream, "PERF: wctl issue stalls=%ld\n", wctl_issue_stalls);
fprintf(stream, "PERF: loads=%ld\n", loads);
fprintf(stream, "PERF: stores=%ld\n", stores);
fprintf(stream, "PERF: branches=%ld\n", branches);
@ -483,6 +507,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
int tex_avg_lat = (int)(double(tex_mem_lat) / double(tex_mem_reads));
fprintf(stream, "PERF: tex memory reads=%ld\n", tex_mem_reads);
fprintf(stream, "PERF: tex memory average latency=%d cycles\n", tex_avg_lat);
fprintf(stream, "PERF: tex issue stalls=%ld\n", tex_issue_stalls);
int tcache_read_hit_ratio = (int)((1.0 - (double(tcache_read_misses) / double(tcache_reads))) * 100);
int tcache_bank_utilization = (int)((double(tcache_reads) / double(tcache_reads + tcache_bank_stalls)) * 100);
fprintf(stream, "PERF: tcache reads=%ld\n", tcache_reads);
@ -498,6 +523,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
fprintf(stream, "PERF: raster memory reads=%ld\n", raster_mem_reads);
fprintf(stream, "PERF: raster memory latency=%d cycles\n", raster_mem_avg_lat);
fprintf(stream, "PERF: raster stall cycles=%ld cycles (%d%%)\n", raster_stall_cycles, raster_stall_cycles_ratio);
fprintf(stream, "PERF: raster issue stalls=%ld\n", raster_issue_stalls);
// cache perf counters
int rcache_read_hit_ratio = (int)((1.0 - (double(rcache_read_misses) / double(rcache_reads))) * 100);
int rcache_bank_utilization = (int)((double(rcache_reads) / double(rcache_reads + rcache_bank_stalls)) * 100);
@ -515,6 +541,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
fprintf(stream, "PERF: rop memory writes=%ld\n", rop_mem_writes);
fprintf(stream, "PERF: rop memory average latency=%d cycles\n", rop_mem_avg_lat);
fprintf(stream, "PERF: rop stall cycles=%ld cycles (%d%%)\n", rop_stall_cycles, rop_stall_cycles_ratio);
fprintf(stream, "PERF: rop issue stalls=%ld\n", rop_issue_stalls);
// cache perf counters
int ocache_read_hit_ratio = (int)((1.0 - (double(ocache_read_misses) / double(ocache_reads))) * 100);
int ocache_write_hit_ratio = (int)((1.0 - (double(ocache_write_misses) / double(ocache_writes))) * 100);