mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
gpu issue perf counters
This commit is contained in:
parent
500afe661e
commit
9e536b57c8
35 changed files with 456 additions and 1000 deletions
|
@ -63,8 +63,8 @@ module VX_alu_unit #(
|
|||
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`INST_ALU_SLL,
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
default: msc_result[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
@ -76,8 +76,7 @@ module VX_alu_unit #(
|
|||
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
|
||||
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
|
||||
: shr_result[i]; // SRL, SRA
|
||||
// 2'b11,
|
||||
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
|
||||
2'b11: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
|
|
@ -314,12 +314,12 @@
|
|||
|
||||
// Size of texture Request Queue
|
||||
`ifndef TEX_REQ_QUEUE_SIZE
|
||||
`define TEX_REQ_QUEUE_SIZE `MAX(2, `NUM_WARPS * 2)
|
||||
`define TEX_REQ_QUEUE_SIZE `MAX(2, `NUM_WARPS)
|
||||
`endif
|
||||
|
||||
// Texture Unit memory pending Queue
|
||||
// Texture Unit memory pending Queue (quad=4)
|
||||
`ifndef TEX_MEM_QUEUE_SIZE
|
||||
`define TEX_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS)
|
||||
`define TEX_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS * 4)
|
||||
`endif
|
||||
|
||||
// Raster Units ////////////////////////////////////////////////////////////////
|
||||
|
@ -356,7 +356,7 @@
|
|||
|
||||
// RASTER memory queue size
|
||||
`ifndef RASTER_MEM_FIFO_DEPTH
|
||||
`define RASTER_MEM_FIFO_DEPTH 4
|
||||
`define RASTER_MEM_FIFO_DEPTH 8
|
||||
`endif
|
||||
|
||||
// Rop Units ///////////////////////////////////////////////////////////////////
|
||||
|
@ -366,9 +366,9 @@
|
|||
`define NUM_ROP_UNITS `UP(`NUM_CORES / 16)
|
||||
`endif
|
||||
|
||||
// ROP memory pending size
|
||||
// ROP memory pending size (quad=4)
|
||||
`ifndef ROP_MEM_QUEUE_SIZE
|
||||
`define ROP_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS)
|
||||
`define ROP_MEM_QUEUE_SIZE `MAX(2, `NUM_WARPS * 4)
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
@ -537,12 +537,12 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef TCACHE_SIZE
|
||||
`define TCACHE_SIZE 4096
|
||||
`define TCACHE_SIZE 8192
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef TCACHE_NUM_BANKS
|
||||
`define TCACHE_NUM_BANKS `NUM_THREADS
|
||||
`define TCACHE_NUM_BANKS 1
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
|
@ -562,7 +562,7 @@
|
|||
|
||||
// Miss Handling Register Size
|
||||
`ifndef TCACHE_MSHR_SIZE
|
||||
`define TCACHE_MSHR_SIZE 8
|
||||
`define TCACHE_MSHR_SIZE (8 * 4)
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
|
@ -670,7 +670,7 @@
|
|||
|
||||
// Number of banks
|
||||
`ifndef OCACHE_NUM_BANKS
|
||||
`define OCACHE_NUM_BANKS `NUM_THREADS
|
||||
`define OCACHE_NUM_BANKS 1
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
|
@ -690,7 +690,7 @@
|
|||
|
||||
// Miss Handling Register Size
|
||||
`ifndef OCACHE_MSHR_SIZE
|
||||
`define OCACHE_MSHR_SIZE 8
|
||||
`define OCACHE_MSHR_SIZE (8 * 4)
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
|
@ -712,7 +712,11 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L2_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L2_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
|
@ -759,8 +763,12 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef L3_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L3_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L3_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
|
|
|
@ -18,6 +18,7 @@ module VX_csr_data #(
|
|||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
VX_perf_gpu_if.slave perf_gpu_if,
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_perf_if.slave perf_tex_if,
|
||||
VX_perf_cache_if.slave perf_tcache_if,
|
||||
|
@ -217,7 +218,10 @@ module VX_csr_data #(
|
|||
`CSR_MPM_MEM_WRITES : read_data_ro_r = perf_memsys_if.mem_writes[31:0];
|
||||
`CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT : read_data_ro_r = perf_memsys_if.mem_latency[31:0];
|
||||
`CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: wctl
|
||||
`CSR_MPM_WCTL_ISSUE_ST : read_data_ro_r = perf_gpu_if.wctl_stalls[31:0];
|
||||
`CSR_MPM_WCTL_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.wctl_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -239,6 +243,8 @@ module VX_csr_data #(
|
|||
`CSR_MPM_TCACHE_MSHR_ST :read_data_ro_r = perf_tcache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_TCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_tcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
`CSR_MPM_TEX_ISSUE_ST : read_data_ro_r = perf_gpu_if.tex_stalls[31:0];
|
||||
`CSR_MPM_TEX_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.tex_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
|
@ -263,6 +269,8 @@ module VX_csr_data #(
|
|||
`CSR_MPM_RCACHE_MSHR_ST :read_data_ro_r = perf_rcache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_RCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_rcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
`CSR_MPM_RASTER_ISSUE_ST : read_data_ro_r = perf_gpu_if.raster_stalls[31:0];
|
||||
`CSR_MPM_RASTER_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.raster_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
|
@ -293,6 +301,8 @@ module VX_csr_data #(
|
|||
`CSR_MPM_OCACHE_MSHR_ST :read_data_ro_r = perf_ocache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_OCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_ocache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
`CSR_MPM_ROP_ISSUE_ST : read_data_ro_r = perf_gpu_if.rop_stalls[31:0];
|
||||
`CSR_MPM_ROP_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.rop_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
|
@ -312,6 +322,13 @@ module VX_csr_data #(
|
|||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_IMADD_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] perf_imadd_stalls = perf_gpu_if.imadd_stalls;
|
||||
`UNUSED_VAR (perf_imadd_stalls);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
|
||||
`endif
|
||||
|
|
|
@ -16,6 +16,7 @@ module VX_csr_unit #(
|
|||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
VX_perf_gpu_if.slave perf_gpu_if,
|
||||
`endif
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
|
@ -148,6 +149,7 @@ module VX_csr_unit #(
|
|||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
.perf_gpu_if (perf_gpu_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
.perf_tcache_if (perf_tcache_if),
|
||||
|
|
|
@ -101,6 +101,10 @@ module VX_execute #(
|
|||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_gpu_if perf_gpu_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (csr_reset, reset);
|
||||
|
@ -140,6 +144,7 @@ module VX_execute #(
|
|||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
.perf_gpu_if (perf_gpu_if),
|
||||
`endif
|
||||
|
||||
.gpu_pending (gpu_pending),
|
||||
|
@ -206,6 +211,10 @@ module VX_execute #(
|
|||
.clk (clk),
|
||||
.reset (gpu_reset),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_gpu_if (perf_gpu_if),
|
||||
`endif
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
|
|
|
@ -13,6 +13,10 @@ module VX_gpu_unit #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_gpu_if.master perf_gpu_if,
|
||||
`endif
|
||||
|
||||
// Inputs
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
|
@ -114,7 +118,7 @@ module VX_gpu_unit #(
|
|||
assign barrier.size_m1 = `UP(`NW_BITS)'(rs2_data - 1);
|
||||
|
||||
// Warp control response
|
||||
wire wctl_req_valid = gpu_req_valid & (is_wspawn | is_tmc | is_split | is_join | is_bar | is_pred);
|
||||
wire wctl_req_valid = gpu_req_valid && (is_wspawn | is_tmc | is_split | is_join | is_bar | is_pred);
|
||||
wire wctl_rsp_valid = wctl_req_valid;
|
||||
wire [WCTL_DATAW-1:0] wctl_rsp_data = {tmc, wspawn, split, barrier};
|
||||
wire wctl_rsp_ready;
|
||||
|
@ -368,4 +372,60 @@ module VX_gpu_unit #(
|
|||
end
|
||||
assign req_pending = req_pending_r;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_tex_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_tex_stalls <= 0;
|
||||
end else begin
|
||||
perf_tex_stalls <= perf_tex_stalls + `PERF_CTR_BITS'(tex_agent_if.valid && ~tex_agent_if.ready);
|
||||
end
|
||||
end
|
||||
assign perf_gpu_if.tex_stalls = perf_tex_stalls;
|
||||
`endif
|
||||
`ifdef EXT_RASTER_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_raster_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_raster_stalls <= 0;
|
||||
end else begin
|
||||
perf_raster_stalls <= perf_raster_stalls + `PERF_CTR_BITS'(raster_agent_if.valid && ~raster_agent_if.ready);
|
||||
end
|
||||
end
|
||||
assign perf_gpu_if.raster_stalls = perf_raster_stalls;
|
||||
`endif
|
||||
`ifdef EXT_ROP_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_rop_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_rop_stalls <= 0;
|
||||
end else begin
|
||||
perf_rop_stalls <= perf_rop_stalls + `PERF_CTR_BITS'(rop_agent_if.valid && ~rop_agent_if.ready);
|
||||
end
|
||||
end
|
||||
assign perf_gpu_if.rop_stalls = perf_rop_stalls;
|
||||
`endif
|
||||
`ifdef EXT_IMADD_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_imadd_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_imadd_stalls <= 0;
|
||||
end else begin
|
||||
perf_imadd_stalls <= perf_imadd_stalls + `PERF_CTR_BITS'(imadd_valid_in && ~imadd_ready_in);
|
||||
end
|
||||
end
|
||||
assign perf_gpu_if.imadd_stalls = perf_imadd_stalls;
|
||||
`endif
|
||||
reg [`PERF_CTR_BITS-1:0] perf_wctl_stalls;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_wctl_stalls <= 0;
|
||||
end else begin
|
||||
perf_wctl_stalls <= perf_wctl_stalls + `PERF_CTR_BITS'(wctl_req_valid && ~wctl_req_ready);
|
||||
end
|
||||
end
|
||||
assign perf_gpu_if.wctl_stalls = perf_wctl_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -216,8 +216,8 @@ module VX_issue #(
|
|||
`endif
|
||||
`EX_LSU: perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'(1);
|
||||
`EX_CSR: perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'(1);
|
||||
//`EX_GPU:
|
||||
default: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'(1);
|
||||
`EX_GPU: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'(1);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
|
|
@ -322,7 +322,8 @@ module VX_lsu_unit #(
|
|||
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
default: rsp_data[i] = rsp_data32;
|
||||
`INST_FMT_W: rsp_data[i] = rsp_data32;
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
|
|
@ -107,6 +107,9 @@
|
|||
`define CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define CSR_MPM_MEM_LAT 12'hB1A // memory latency
|
||||
`define CSR_MPM_MEM_LAT_H 12'hB9A
|
||||
// PERF: wctl
|
||||
`define CSR_MPM_WCTL_ISSUE_ST 12'hB1B // issue stalls
|
||||
`define CSR_MPM_WCTL_ISSUE_ST_H 12'hB9B
|
||||
|
||||
// Machine Performance-monitoring texture counters
|
||||
// PERF: texture unit
|
||||
|
@ -123,6 +126,9 @@
|
|||
`define CSR_MPM_TCACHE_BANK_ST_H 12'hB87
|
||||
`define CSR_MPM_TCACHE_MSHR_ST 12'hB08 // MSHR stalls
|
||||
`define CSR_MPM_TCACHE_MSHR_ST_H 12'hB88
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_TEX_ISSUE_ST 12'hB09 // issue stalls
|
||||
`define CSR_MPM_TEX_ISSUE_ST_H 12'hB89
|
||||
|
||||
// Machine Performance-monitoring raster counters
|
||||
// PERF: raster unit
|
||||
|
@ -141,6 +147,9 @@
|
|||
`define CSR_MPM_RCACHE_BANK_ST_H 12'hB88
|
||||
`define CSR_MPM_RCACHE_MSHR_ST 12'hB09 // MSHR stalls
|
||||
`define CSR_MPM_RCACHE_MSHR_ST_H 12'hB89
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_RASTER_ISSUE_ST 12'hB0A // issue stalls
|
||||
`define CSR_MPM_RASTER_ISSUE_ST_H 12'hB8A
|
||||
|
||||
// Machine Performance-monitoring rop counters
|
||||
// PERF: rop unit
|
||||
|
@ -165,6 +174,9 @@
|
|||
`define CSR_MPM_OCACHE_BANK_ST_H 12'hB8B
|
||||
`define CSR_MPM_OCACHE_MSHR_ST 12'hB0C // MSHR stalls
|
||||
`define CSR_MPM_OCACHE_MSHR_ST_H 12'hB8C
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_ROP_ISSUE_ST 12'hB0D // issue stalls
|
||||
`define CSR_MPM_ROP_ISSUE_ST_H 12'hB8D
|
||||
|
||||
// Machine Information Registers
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ module VX_warp_sched #(
|
|||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [31:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire schedule_ready;
|
||||
wire warp_scheduled;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] cycles;
|
||||
|
@ -243,9 +244,7 @@ module VX_warp_sched #(
|
|||
|
||||
assign {schedule_tmask, schedule_pc} = schedule_data[schedule_wid];
|
||||
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
assign warp_scheduled = schedule_valid && ~stall_out;
|
||||
assign warp_scheduled = schedule_valid && schedule_ready;
|
||||
|
||||
`ifdef SIMULATION
|
||||
assign instr_uuid = (issued_instrs[schedule_wid] * `NUM_WARPS * `NUM_CORES * `NUM_CLUSTERS)
|
||||
|
@ -255,15 +254,18 @@ module VX_warp_sched #(
|
|||
assign instr_uuid = 0;
|
||||
`endif
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `UP(`UUID_BITS) + `NUM_THREADS + 32 + `UP(`NW_BITS)),
|
||||
.RESETW (1)
|
||||
VX_generic_buffer #(
|
||||
.DATAW (`UP(`UUID_BITS) + `NUM_THREADS + 32 + `UP(`NW_BITS)),
|
||||
.OUT_REG (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({schedule_valid, instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
.valid_in (schedule_valid),
|
||||
.ready_in (schedule_ready),
|
||||
.data_in ({instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}),
|
||||
.valid_out (ifetch_req_if.valid),
|
||||
.ready_out (ifetch_req_if.ready)
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
|
|
@ -303,9 +303,9 @@ module VX_mem_scheduler #(
|
|||
|
||||
// Handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
reg [QUEUE_SIZE-1:0][REQ_SIZEW-1:0] rsp_rem_size;
|
||||
wire [REQ_SIZEW-1:0] rsp_rem_size_n;
|
||||
wire [`UP(BATCH_SEL_BITS)-1:0] rsp_batch_idx;
|
||||
reg [REQ_SIZEW-1:0] rsp_rem_size [QUEUE_SIZE-1:0];
|
||||
wire [REQ_SIZEW-1:0] rsp_rem_size_n;
|
||||
wire [`UP(BATCH_SEL_BITS)-1:0] rsp_batch_idx;
|
||||
|
||||
// Select memory response
|
||||
VX_mem_rsp_sel #(
|
||||
|
@ -329,10 +329,18 @@ module VX_mem_scheduler #(
|
|||
);
|
||||
|
||||
wire [REQ_SIZEW-1:0] reqq_size;
|
||||
wire [NUM_BANKS-1:0] mem_rsp_mask_x;
|
||||
`POP_COUNT(reqq_size, reqq_mask);
|
||||
|
||||
wire [BANK_SIZEW-1:0] mem_rsp_size;
|
||||
`POP_COUNT(mem_rsp_size, mem_rsp_mask_s);
|
||||
if (NUM_BANKS > 1) begin
|
||||
`POP_COUNT(mem_rsp_size, mem_rsp_mask_s);
|
||||
assign mem_rsp_mask_x = mem_rsp_mask_s;
|
||||
end else begin
|
||||
assign mem_rsp_size = 1'b1;
|
||||
assign mem_rsp_mask_x = 1'b1;
|
||||
`UNUSED_VAR (mem_rsp_mask_s)
|
||||
end
|
||||
|
||||
if (NUM_BATCHES > 1) begin
|
||||
assign rsp_batch_idx = mem_rsp_tag_s[QUEUE_ADDRW +: BATCH_SEL_BITS];
|
||||
|
@ -363,49 +371,40 @@ module VX_mem_scheduler #(
|
|||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
localparam SIZE = ((i + 1) * NUM_BANKS > NUM_REQS) ? REM_BATCH_SIZE : NUM_BANKS;
|
||||
assign crsp_mask[i * NUM_BANKS +: SIZE] = {SIZE{(i == rsp_batch_idx)}} & mem_rsp_mask_s[SIZE-1:0];
|
||||
assign crsp_mask[i * NUM_BANKS +: SIZE] = {SIZE{(i == rsp_batch_idx)}} & mem_rsp_mask_x[SIZE-1:0];
|
||||
assign crsp_data[i * NUM_BANKS +: SIZE] = mem_rsp_data_s[SIZE-1:0];
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
reg [QUEUE_SIZE-1:0][NUM_BATCHES-1:0][NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_store;
|
||||
reg [NUM_BATCHES-1:0][NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_store_n;
|
||||
reg [NUM_BATCHES-1:0][NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_store [QUEUE_SIZE-1:0];
|
||||
reg [QUEUE_SIZE-1:0][NUM_REQS-1:0] rsp_orig_mask;
|
||||
wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] mem_rsp_data_m;
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
|
||||
|
||||
assign crsp_valid = mem_rsp_valid_s & rsp_complete;
|
||||
|
||||
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign mem_rsp_data_m[i] = {DATA_WIDTH{mem_rsp_mask_s[i]}} & mem_rsp_data_s[i];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
rsp_store_n = rsp_store[ibuf_raddr];
|
||||
rsp_store_n[rsp_batch_idx] |= mem_rsp_data_m;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
localparam SIZE = ((i + 1) * NUM_BANKS > NUM_REQS) ? REM_BATCH_SIZE : NUM_BANKS;
|
||||
assign crsp_data[i * NUM_BANKS +: SIZE] = rsp_store_n[i][SIZE-1:0];
|
||||
assign mem_rsp_data_m[i] = {DATA_WIDTH{mem_rsp_mask_x[i]}} & mem_rsp_data_s[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rsp_store <= '0;
|
||||
end else begin
|
||||
if (ibuf_push) begin
|
||||
rsp_store[ibuf_waddr] <= '0;
|
||||
rsp_orig_mask[ibuf_waddr] <= req_mask;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
rsp_store[ibuf_raddr] <= rsp_store_n;
|
||||
end
|
||||
if (ibuf_push) begin
|
||||
rsp_store[ibuf_waddr] <= '0;
|
||||
rsp_orig_mask[ibuf_waddr] <= req_mask;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
rsp_store[ibuf_raddr][rsp_batch_idx] <= mem_rsp_data_m;
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
|
||||
|
||||
assign crsp_valid = mem_rsp_valid_s & rsp_complete;
|
||||
|
||||
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
|
||||
|
||||
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
|
||||
localparam SIZE = ((i + 1) * NUM_BANKS > NUM_REQS) ? REM_BATCH_SIZE : NUM_BANKS;
|
||||
assign crsp_data[i * NUM_BANKS +: SIZE] = rsp_store[ibuf_raddr][i][SIZE-1:0]
|
||||
| ({(SIZE * DATA_WIDTH){(i == rsp_batch_idx)}} & mem_rsp_data_m[SIZE-1:0]);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -455,24 +454,28 @@ module VX_mem_scheduler #(
|
|||
`UNUSED_VAR (mem_req_dbg_uuid)
|
||||
`UNUSED_VAR (mem_rsp_dbg_uuid)
|
||||
|
||||
reg [QUEUE_SIZE-1:0][(`UP(`UUID_BITS) + TAG_ONLY_WIDTH + 64 + 1)-1:0] pending_reqs;
|
||||
reg [(`UP(`UUID_BITS) + TAG_ONLY_WIDTH + 64)-1:0] pending_reqs [QUEUE_SIZE-1:0];
|
||||
reg [QUEUE_SIZE-1:0] pending_req_valids;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pending_reqs <= '0;
|
||||
end begin
|
||||
pending_req_valids <= '0;
|
||||
end else begin
|
||||
if (ibuf_push) begin
|
||||
pending_reqs[ibuf_waddr] <= {req_dbg_uuid, req_tag_only, $time, 1'b1};
|
||||
pending_reqs[ibuf_waddr] <= {req_dbg_uuid, req_tag_only, $time};
|
||||
pending_req_valids[ibuf_waddr] <= 1'b1;
|
||||
end
|
||||
if (ibuf_pop) begin
|
||||
pending_reqs[ibuf_raddr] <= '0;
|
||||
pending_req_valids[ibuf_raddr] <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
for (integer i = 0; i < QUEUE_SIZE; ++i) begin
|
||||
if (pending_reqs[i][0]) begin
|
||||
`ASSERT(($time - pending_reqs[i][1 +: 64]) < `STALL_TIMEOUT,
|
||||
if (pending_req_valids[i]) begin
|
||||
`ASSERT(($time - pending_reqs[i][0 +: 64]) < `STALL_TIMEOUT,
|
||||
("%t: *** %s response timeout: remaining=%0d, tag=0x%0h (#%0d)",
|
||||
$time, INSTANCE_ID, rsp_rem_size[i], pending_reqs[i][1+64 +: TAG_ONLY_WIDTH], pending_reqs[i][1+64+TAG_ONLY_WIDTH +: `UP(`UUID_BITS)]));
|
||||
$time, INSTANCE_ID, rsp_rem_size[i], pending_reqs[i][64 +: TAG_ONLY_WIDTH], pending_reqs[i][64+TAG_ONLY_WIDTH +: `UP(`UUID_BITS)]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,7 +10,94 @@ module VX_onehot_mux #(
|
|||
input wire [N-1:0] sel_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
if (N > 1) begin
|
||||
if (N == 1) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end else if (N == 2) begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = sel_in[0] ? data_in[0] : data_in[1];
|
||||
end else if (N == 3) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
3'b001: data_out_r = data_in[0];
|
||||
3'b010: data_out_r = data_in[1];
|
||||
3'b100: data_out_r = data_in[2];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 4) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
4'b0001: data_out_r = data_in[0];
|
||||
4'b0010: data_out_r = data_in[1];
|
||||
4'b0100: data_out_r = data_in[2];
|
||||
4'b1000: data_out_r = data_in[3];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 5) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
5'b00001: data_out_r = data_in[0];
|
||||
5'b00010: data_out_r = data_in[1];
|
||||
5'b00100: data_out_r = data_in[2];
|
||||
5'b01000: data_out_r = data_in[3];
|
||||
5'b10000: data_out_r = data_in[4];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 6) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
6'b000001: data_out_r = data_in[0];
|
||||
6'b000010: data_out_r = data_in[1];
|
||||
6'b000100: data_out_r = data_in[2];
|
||||
6'b001000: data_out_r = data_in[3];
|
||||
6'b010000: data_out_r = data_in[4];
|
||||
6'b100000: data_out_r = data_in[5];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 7) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
7'b0000001: data_out_r = data_in[0];
|
||||
7'b0000010: data_out_r = data_in[1];
|
||||
7'b0000100: data_out_r = data_in[2];
|
||||
7'b0001000: data_out_r = data_in[3];
|
||||
7'b0010000: data_out_r = data_in[4];
|
||||
7'b0100000: data_out_r = data_in[5];
|
||||
7'b1000000: data_out_r = data_in[6];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else if (N == 8) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
case (sel_in)
|
||||
8'b00000001: data_out_r = data_in[0];
|
||||
8'b00000010: data_out_r = data_in[1];
|
||||
8'b00000100: data_out_r = data_in[2];
|
||||
8'b00001000: data_out_r = data_in[3];
|
||||
8'b00010000: data_out_r = data_in[4];
|
||||
8'b00100000: data_out_r = data_in[5];
|
||||
8'b01000000: data_out_r = data_in[6];
|
||||
8'b10000000: data_out_r = data_in[7];
|
||||
default: data_out_r = 'x;
|
||||
endcase
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
end else begin
|
||||
if (MODEL == 1) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
|
@ -21,7 +108,7 @@ module VX_onehot_mux #(
|
|||
end
|
||||
end
|
||||
end
|
||||
assign data_out = data_out_r;
|
||||
assign data_out = data_out_r;
|
||||
end else if (MODEL == 2) begin
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(*) begin
|
||||
|
@ -44,9 +131,6 @@ module VX_onehot_mux #(
|
|||
assign data_out[i] = (| gather);
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -114,7 +114,7 @@ module VX_raster_mem #(
|
|||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
mem_req_valid <= 0;
|
||||
end begin
|
||||
end else begin
|
||||
// deassert memory request when fired
|
||||
if (mem_req_fire) begin
|
||||
mem_req_valid <= 0;
|
||||
|
|
|
@ -75,7 +75,7 @@ module VX_raster_te #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
tile_valid <= 0;
|
||||
end begin
|
||||
end else begin
|
||||
if (~stall) begin
|
||||
tile_valid <= 0;
|
||||
if (fifo_arb_valid) begin
|
||||
|
|
|
@ -272,7 +272,7 @@ module VX_raster_unit #(
|
|||
if (reset) begin
|
||||
perf_pending_reads <= 0;
|
||||
end else begin
|
||||
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'(perf_pending_reads_cycle);
|
||||
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'($signed(perf_pending_reads_cycle));
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -266,40 +266,39 @@ module VX_rop_unit #(
|
|||
.full (pending_reads_full),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
wire mem_req_stall = mem_req_valid_r & ~mem_req_ready_r;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + NUM_LANES * (1 + 1 + 2 * `ROP_DIM_BITS + $bits(rgba_t) + `ROP_DEPTH_BITS + `ROP_STENCIL_BITS + 1) + MEM_TAG_WIDTH),
|
||||
.RESETW (1)
|
||||
) mem_req_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~mem_req_stall),
|
||||
.data_in ({mem_req_valid, mem_req_rw, mem_req_mask, mem_req_ds_pass, mem_req_pos_x, mem_req_pos_y, mem_req_color, mem_req_depth, mem_req_stencil, mem_req_face, mem_req_tag}),
|
||||
.data_out ({mem_req_valid_r, mem_req_rw_r, mem_req_mask_r, mem_req_ds_pass_r, mem_req_pos_x_r, mem_req_pos_y_r, mem_req_color_r, mem_req_depth_r, mem_req_stencil_r, mem_req_face_r, mem_req_tag_r})
|
||||
);
|
||||
|
||||
assign mem_req_ready = ~mem_req_stall;
|
||||
VX_generic_buffer #(
|
||||
.DATAW (1 + NUM_LANES * (1 + 1 + 2 * `ROP_DIM_BITS + $bits(rgba_t) + `ROP_DEPTH_BITS + `ROP_STENCIL_BITS + 1) + MEM_TAG_WIDTH),
|
||||
.OUT_REG (1)
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid),
|
||||
.ready_in (mem_req_ready),
|
||||
.data_in ({mem_req_rw, mem_req_mask, mem_req_ds_pass, mem_req_pos_x, mem_req_pos_y, mem_req_color, mem_req_depth, mem_req_stencil, mem_req_face, mem_req_tag}),
|
||||
.data_out ({mem_req_rw_r, mem_req_mask_r, mem_req_ds_pass_r, mem_req_pos_x_r, mem_req_pos_y_r, mem_req_color_r, mem_req_depth_r, mem_req_stencil_r, mem_req_face_r, mem_req_tag_r}),
|
||||
.valid_out (mem_req_valid_r),
|
||||
.ready_out (mem_req_ready_r)
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_rd_req_per_cycle;
|
||||
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_wr_req_per_cycle;
|
||||
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_rsp_per_cycle;
|
||||
wire [$clog2(OCACHE_NUM_REQS+1)-1:0] perf_mem_rd_rsp_per_cycle;
|
||||
wire [$clog2(OCACHE_NUM_REQS+1)+1-1:0] perf_pending_reads_cycle;
|
||||
|
||||
wire [OCACHE_NUM_REQS-1:0] perf_mem_rd_req_per_mask = cache_req_if.valid & ~cache_req_if.rw & cache_req_if.ready;
|
||||
wire [OCACHE_NUM_REQS-1:0] perf_mem_wr_req_per_mask = cache_req_if.valid & cache_req_if.rw & cache_req_if.ready;
|
||||
wire [OCACHE_NUM_REQS-1:0] perf_mem_rsp_per_mask = cache_rsp_if.valid & cache_rsp_if.ready;
|
||||
wire [OCACHE_NUM_REQS-1:0] perf_mem_rd_rsp_per_mask = cache_rsp_if.valid & cache_rsp_if.ready;
|
||||
|
||||
`POP_COUNT(perf_mem_rd_req_per_cycle, perf_mem_rd_req_per_mask);
|
||||
`POP_COUNT(perf_mem_wr_req_per_cycle, perf_mem_wr_req_per_mask);
|
||||
`POP_COUNT(perf_mem_rsp_per_cycle, perf_mem_rsp_per_mask);
|
||||
`POP_COUNT(perf_mem_rd_rsp_per_cycle, perf_mem_rd_rsp_per_mask);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_pending_reads;
|
||||
assign perf_pending_reads_cycle = perf_mem_rd_req_per_cycle - perf_mem_rsp_per_cycle;
|
||||
assign perf_pending_reads_cycle = perf_mem_rd_req_per_cycle - perf_mem_rd_rsp_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
|
@ -170,29 +170,29 @@ module VX_tex_mem #(
|
|||
|
||||
always @(*) begin
|
||||
case (mem_rsp_lgstride)
|
||||
0: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[7:0]);
|
||||
1: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[15:0]);
|
||||
default: mem_rsp_data_qual[i][j] = rsp_data_shifted;
|
||||
0: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[7:0]);
|
||||
1: mem_rsp_data_qual[i][j] = 32'(rsp_data_shifted[15:0]);
|
||||
2: mem_rsp_data_qual[i][j] = rsp_data_shifted;
|
||||
default: mem_rsp_data_qual[i][j] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + REQ_INFOW + (4 * NUM_LANES * 32)),
|
||||
.RESETW (1)
|
||||
VX_generic_buffer #(
|
||||
.DATAW (REQ_INFOW + (4 * NUM_LANES * 32)),
|
||||
.OUT_REG (1)
|
||||
) rsp_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({mem_rsp_valid, mem_rsp_info, mem_rsp_data_qual}),
|
||||
.data_out ({rsp_valid, rsp_info, rsp_data})
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({mem_rsp_info, mem_rsp_data_qual}),
|
||||
.data_out ({rsp_info, rsp_data}),
|
||||
.valid_out (rsp_valid),
|
||||
.ready_out (rsp_ready)
|
||||
);
|
||||
|
||||
assign mem_rsp_ready = ~stall_out;
|
||||
|
||||
`ifdef DBG_TRACE_TEX
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -14,9 +14,9 @@ module VX_tex_stride (
|
|||
`TEX_FORMAT_A1R5G5B5,
|
||||
`TEX_FORMAT_A4R4G4B4,
|
||||
`TEX_FORMAT_A8L8: log_stride_r = 1;
|
||||
// `TEX_FORMAT_L8:
|
||||
// `TEX_FORMAT_A8:
|
||||
default: log_stride_r = 0;
|
||||
`TEX_FORMAT_L8,
|
||||
`TEX_FORMAT_A8: log_stride_r = 0;
|
||||
default: log_stride_r = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
|
|
@ -45,11 +45,11 @@ module VX_tex_unit #(
|
|||
|
||||
wire req_valid;
|
||||
wire [NUM_LANES-1:0] req_mask;
|
||||
logic [`TEX_FILTER_BITS-1:0] req_filter;
|
||||
logic [`TEX_FORMAT_BITS-1:0] req_format;
|
||||
logic [1:0][`TEX_WRAP_BITS-1:0] req_wraps;
|
||||
wire [`TEX_FILTER_BITS-1:0] req_filter;
|
||||
wire [`TEX_FORMAT_BITS-1:0] req_format;
|
||||
wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps;
|
||||
wire [1:0][`TEX_LOD_BITS-1:0] req_logdims;
|
||||
logic [`TEX_ADDR_BITS-1:0] req_baseaddr;
|
||||
wire [`TEX_ADDR_BITS-1:0] req_baseaddr;
|
||||
wire [1:0][NUM_LANES-1:0][31:0] req_coords;
|
||||
wire [NUM_LANES-1:0][`TEX_LOD_BITS-1:0] req_miplevel, sel_miplevel;
|
||||
wire [NUM_LANES-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff, sel_mipoff;
|
||||
|
@ -61,22 +61,20 @@ module VX_tex_unit #(
|
|||
assign sel_mipoff[i] = tex_dcrs.mipoff[sel_miplevel[i]];
|
||||
end
|
||||
|
||||
wire stall_in = req_valid && ~req_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_LANES + `TEX_FILTER_BITS + `TEX_FORMAT_BITS + 2 * `TEX_WRAP_BITS + 2 * `TEX_LOD_BITS + `TEX_ADDR_BITS + NUM_LANES * (2 * 32 + `TEX_LOD_BITS + `TEX_MIPOFF_BITS) + TAG_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_in),
|
||||
.data_in ({tex_req_if.valid, tex_req_if.mask, tex_dcrs.filter, tex_dcrs.format, tex_dcrs.wraps, tex_dcrs.logdims, tex_dcrs.baseaddr, tex_req_if.coords, sel_miplevel, sel_mipoff, tex_req_if.tag}),
|
||||
.data_out ({req_valid, req_mask, req_filter, req_format, req_wraps, req_logdims, req_baseaddr, req_coords, req_miplevel, req_mipoff, req_tag})
|
||||
VX_generic_buffer #(
|
||||
.DATAW (NUM_LANES + `TEX_FILTER_BITS + `TEX_FORMAT_BITS + 2 * `TEX_WRAP_BITS + 2 * `TEX_LOD_BITS + `TEX_ADDR_BITS + NUM_LANES * (2 * 32 + `TEX_LOD_BITS + `TEX_MIPOFF_BITS) + TAG_WIDTH),
|
||||
.OUT_REG (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (tex_req_if.valid),
|
||||
.ready_in (tex_req_if.ready),
|
||||
.data_in ({tex_req_if.mask, tex_dcrs.filter, tex_dcrs.format, tex_dcrs.wraps, tex_dcrs.logdims, tex_dcrs.baseaddr, tex_req_if.coords, sel_miplevel, sel_mipoff, tex_req_if.tag}),
|
||||
.data_out ({req_mask, req_filter, req_format, req_wraps, req_logdims, req_baseaddr, req_coords, req_miplevel, req_mipoff, req_tag}),
|
||||
.valid_out (req_valid),
|
||||
.ready_out (req_ready)
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign tex_req_if.ready = ~stall_in;
|
||||
|
||||
// address generation
|
||||
|
||||
wire mem_req_valid;
|
||||
|
@ -239,7 +237,7 @@ module VX_tex_unit #(
|
|||
end
|
||||
|
||||
assign perf_tex_if.mem_reads = perf_mem_reads;
|
||||
assign perf_tex_if.mem_latency = perf_pending_reads;
|
||||
assign perf_tex_if.mem_latency = perf_mem_latency;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_TEX
|
||||
|
|
|
@ -29,9 +29,18 @@ DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
|||
|
||||
CONFIGS += -DEXT_GFX_ENABLE
|
||||
|
||||
#CONFIGS += -DNUM_ROP_UNITS=2
|
||||
#CONFIGS += -DNUM_TEX_UNITS=4
|
||||
#CONFIGS += -DNUM_RASTER_UNITS=2
|
||||
#CONFIGS += -DTCACHE_NUM_BANKS=1
|
||||
#CONFIGS += -DOCACHE_NUM_BANKS=1
|
||||
#CONFIGS += -DOCACHE_NUM_BANKS=1
|
||||
|
||||
#CONFIGS += -DL1_DISABLE
|
||||
#CONFIGS += -DSM_DISABLE
|
||||
#CONFIGS += -DRCACHE_DISABLE -DOCACHE_DISABLE -DTCACHE_DISABLE
|
||||
#CONFIGS += -DRCACHE_DISABLE
|
||||
#CONFIGS += -DOCACHE_DISABLE
|
||||
#CONFIGS += -DTCACHE_DISABLE
|
||||
|
||||
ifeq ($(DEVICE_FAMILY), stratix10)
|
||||
CONFIGS += -DALTERA_S10
|
||||
|
|
|
@ -3,5 +3,4 @@
|
|||
# FPGA programming
|
||||
# first argument is the bitstream
|
||||
|
||||
echo "fpgaconf --bus 0xaf $1"
|
||||
fpgaconf --bus 0xaf $1
|
|
@ -21,6 +21,7 @@ set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
|||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||
|
||||
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
|
@ -28,6 +29,7 @@ set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
|
|||
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_DUPLICATION ON
|
||||
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
|
|
|
@ -1,68 +1,70 @@
|
|||
BUILD_DIR ?= build
|
||||
|
||||
BUILD_DIR_X=$(BUILD_DIR)_$(DEVICE_FAMILY)
|
||||
|
||||
.PHONY: dogfood unittest pipeline smem cache fpu_core core vortex vortex-gfx top top-gfx texunit test
|
||||
|
||||
dogfood:
|
||||
mkdir -p dogfood/$(BUILD_DIR)
|
||||
cp dogfood/Makefile dogfood/$(BUILD_DIR)
|
||||
$(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p dogfood/$(BUILD_DIR_X)
|
||||
cp dogfood/Makefile dogfood/$(BUILD_DIR_X)
|
||||
$(MAKE) -C dogfood/$(BUILD_DIR_X) clean && $(MAKE) -C dogfood/$(BUILD_DIR_X) > dogfood/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
unittest:
|
||||
mkdir -p unittest/$(BUILD_DIR)
|
||||
cp unittest/Makefile unittest/$(BUILD_DIR)
|
||||
$(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p unittest/$(BUILD_DIR_X)
|
||||
cp unittest/Makefile unittest/$(BUILD_DIR_X)
|
||||
$(MAKE) -C unittest/$(BUILD_DIR_X) clean && $(MAKE) -C unittest/$(BUILD_DIR_X) > unittest/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
pipeline:
|
||||
mkdir -p pipeline/$(BUILD_DIR)
|
||||
cp pipeline/Makefile pipeline/$(BUILD_DIR)
|
||||
$(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p pipeline/$(BUILD_DIR_X)
|
||||
cp pipeline/Makefile pipeline/$(BUILD_DIR_X)
|
||||
$(MAKE) -C pipeline/$(BUILD_DIR_X) clean && $(MAKE) -C pipeline/$(BUILD_DIR_X) > pipeline/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
smem:
|
||||
mkdir -p smem/$(BUILD_DIR)
|
||||
cp smem/Makefile smem/$(BUILD_DIR)
|
||||
$(MAKE) -C smem/$(BUILD_DIR) clean && $(MAKE) -C smem/$(BUILD_DIR) > smem/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p smem/$(BUILD_DIR_X)
|
||||
cp smem/Makefile smem/$(BUILD_DIR_X)
|
||||
$(MAKE) -C smem/$(BUILD_DIR_X) clean && $(MAKE) -C smem/$(BUILD_DIR_X) > smem/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
cache:
|
||||
mkdir -p cache/$(BUILD_DIR)
|
||||
cp cache/Makefile cache/$(BUILD_DIR)
|
||||
$(MAKE) -C cache/$(BUILD_DIR) clean && $(MAKE) -C cache/$(BUILD_DIR) > cache/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p cache/$(BUILD_DIR_X)
|
||||
cp cache/Makefile cache/$(BUILD_DIR_X)
|
||||
$(MAKE) -C cache/$(BUILD_DIR_X) clean && $(MAKE) -C cache/$(BUILD_DIR_X) > cache/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
fpu_core:
|
||||
mkdir -p fpu_core/$(BUILD_DIR)
|
||||
cp fpu_core/Makefile fpu_core/$(BUILD_DIR)
|
||||
$(MAKE) -C fpu_core/$(BUILD_DIR) clean && $(MAKE) -C fpu_core/$(BUILD_DIR) > fpu_core/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p fpu_core/$(BUILD_DIR_X)
|
||||
cp fpu_core/Makefile fpu_core/$(BUILD_DIR_X)
|
||||
$(MAKE) -C fpu_core/$(BUILD_DIR_X) clean && $(MAKE) -C fpu_core/$(BUILD_DIR_X) > fpu_core/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
core:
|
||||
mkdir -p core/$(BUILD_DIR)
|
||||
cp core/Makefile core/$(BUILD_DIR)
|
||||
$(MAKE) -C core/$(BUILD_DIR) clean && $(MAKE) -C core/$(BUILD_DIR) > core/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p core/$(BUILD_DIR_X)
|
||||
cp core/Makefile core/$(BUILD_DIR_X)
|
||||
$(MAKE) -C core/$(BUILD_DIR_X) clean && $(MAKE) -C core/$(BUILD_DIR_X) > core/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
vortex:
|
||||
mkdir -p vortex/$(BUILD_DIR)
|
||||
cp vortex/Makefile vortex/$(BUILD_DIR)
|
||||
$(MAKE) -C vortex/$(BUILD_DIR) clean && $(MAKE) -C vortex/$(BUILD_DIR) > vortex/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p vortex/$(BUILD_DIR_X)
|
||||
cp vortex/Makefile vortex/$(BUILD_DIR_X)
|
||||
$(MAKE) -C vortex/$(BUILD_DIR_X) clean && $(MAKE) -C vortex/$(BUILD_DIR_X) > vortex/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
vortex-gfx:
|
||||
mkdir -p vortex-gfx/$(BUILD_DIR)
|
||||
cp vortex-gfx/Makefile vortex-gfx/$(BUILD_DIR)
|
||||
$(MAKE) -C vortex-gfx/$(BUILD_DIR) clean && $(MAKE) -C vortex-gfx/$(BUILD_DIR) > vortex-gfx/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p vortex-gfx/$(BUILD_DIR_X)
|
||||
cp vortex-gfx/Makefile vortex-gfx/$(BUILD_DIR_X)
|
||||
$(MAKE) -C vortex-gfx/$(BUILD_DIR_X) clean && $(MAKE) -C vortex-gfx/$(BUILD_DIR_X) > vortex-gfx/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
top:
|
||||
mkdir -p top/$(BUILD_DIR)
|
||||
cp top/Makefile top/$(BUILD_DIR)
|
||||
$(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p top/$(BUILD_DIR_X)
|
||||
cp top/Makefile top/$(BUILD_DIR_X)
|
||||
$(MAKE) -C top/$(BUILD_DIR_X) clean && $(MAKE) -C top/$(BUILD_DIR_X) > top/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
top-gfx:
|
||||
mkdir -p top-gfx/$(BUILD_DIR)
|
||||
cp top-gfx/Makefile top-gfx/$(BUILD_DIR)
|
||||
$(MAKE) -C top-gfx/$(BUILD_DIR) clean && $(MAKE) -C top-gfx/$(BUILD_DIR) > top-gfx/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p top-gfx/$(BUILD_DIR_X)
|
||||
cp top-gfx/Makefile top-gfx/$(BUILD_DIR_X)
|
||||
$(MAKE) -C top-gfx/$(BUILD_DIR_X) clean && $(MAKE) -C top-gfx/$(BUILD_DIR_X) > top-gfx/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
texunit:
|
||||
mkdir -p texunit/$(BUILD_DIR)
|
||||
cp texunit/Makefile texunit/$(BUILD_DIR)
|
||||
$(MAKE) -C texunit/$(BUILD_DIR) clean && $(MAKE) -C texunit/$(BUILD_DIR) > texunit/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p texunit/$(BUILD_DIR_X)
|
||||
cp texunit/Makefile texunit/$(BUILD_DIR_X)
|
||||
$(MAKE) -C texunit/$(BUILD_DIR_X) clean && $(MAKE) -C texunit/$(BUILD_DIR_X) > texunit/$(BUILD_DIR_X)/build.log 2>&1 &
|
||||
|
||||
test:
|
||||
mkdir -p test/$(BUILD_DIR)
|
||||
cp test/Makefile test/$(BUILD_DIR)
|
||||
$(MAKE) -C test/$(BUILD_DIR) clean && $(MAKE) -C test/$(BUILD_DIR) > test/$(BUILD_DIR)/build.log 2>&1 &
|
||||
mkdir -p test/$(BUILD_DIR_X)
|
||||
cp test/Makefile test/$(BUILD_DIR_X)
|
||||
$(MAKE) -C test/$(BUILD_DIR_X) clean && $(MAKE) -C test/$(BUILD_DIR_X) > test/$(BUILD_DIR_X)/build.log 2>&1 &
|
73
hw/syn/altera/quartus/cache/Makefile
vendored
73
hw/syn/altera/quartus/cache/Makefile
vendored
|
@ -1,72 +1,7 @@
|
|||
PROJECT = VX_cache
|
||||
TOP_LEVEL_ENTITY = VX_cache
|
||||
SRC_FILE = VX_cache.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
PROJECT = VX_cache_syn
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
include ../../common.mk
|
||||
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,81 +1,9 @@
|
|||
PROJECT = Core
|
||||
TOP_LEVEL_ENTITY = VX_core
|
||||
SRC_FILE = VX_core.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
PROJECT = VX_core_syn
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
TEX_INCLUDE = $(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,87 +1,8 @@
|
|||
PROJECT = VX_fpu_fpga
|
||||
TOP_LEVEL_ENTITY = VX_fpu_fpga
|
||||
SRC_FILE = VX_fpu_fpga.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(FPU_INCLUDE);$(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(IP_DIR)
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
|
||||
# -set "FPU_CVT2"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
timing: $(PROJECT_FILES)
|
||||
quartus_sh -t ../../timing-html.tcl -project $(PROJECT)
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,72 +1,7 @@
|
|||
PROJECT = VX_shared_mem
|
||||
TOP_LEVEL_ENTITY = VX_shared_mem
|
||||
SRC_FILE = VX_shared_mem.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
include ../../common.mk
|
||||
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,83 +1,11 @@
|
|||
PROJECT = Vortex
|
||||
TOP_LEVEL_ENTITY = Vortex
|
||||
SRC_FILE = Vortex.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
include ../../common.mk
|
||||
|
||||
CONFIGS += -set "EXT_GFX_ENABLE"
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
GFX_INCLUDE = $(RTL_DIR)/tex_unit;$(RTL_DIR)/raster_unit;$(RTL_DIR)/rop_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(GFX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,81 +1,9 @@
|
|||
PROJECT = Core
|
||||
TOP_LEVEL_ENTITY = VX_core
|
||||
SRC_FILE = VX_core.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
PROJECT = VX_tex_unit_syn
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
TEX_INCLUDE = $(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "EXT_TEX_ENABLE"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,23 +1,16 @@
|
|||
PROJECT = vortex_afu
|
||||
TOP_LEVEL_ENTITY = vortex_afu
|
||||
SRC_FILE = vortex_afu.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
AFU_DIR = ../../../../../afu/opae
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
CONFIGS += -set "NOPAE"
|
||||
CONFIGS += -set "EXT_GFX_ENABLE"
|
||||
|
||||
CONFIGS += -set "NUM_CORES=4"
|
||||
|
||||
CONFIGS += -set "L2_ENABLE"
|
||||
|
||||
#CONFIGS += -set "L1_DISABLE"
|
||||
#CONFIGS += -set "SM_DISABLE"
|
||||
#CONFIGS += -set "RCACHE_DISABLE" -set "OCACHE_DISABLE" -set "TCACHE_DISABLE"
|
||||
|
@ -25,67 +18,3 @@ CONFIGS += -set "NUM_CORES=4"
|
|||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
TEX_INCLUDE = $(RTL_DIR)/tex_unit;$(RTL_DIR)/raster_unit;$(RTL_DIR)/rop_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(AFU_DIR);$(AFU_DIR)/ccip;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,89 +1,18 @@
|
|||
PROJECT = vortex_afu
|
||||
TOP_LEVEL_ENTITY = vortex_afu
|
||||
SRC_FILE = vortex_afu.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
AFU_DIR = ../../../../../afu/opae
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
CONFIGS += -set "NOPAE"
|
||||
|
||||
CONFIGS += -set "NUM_CORES=4"
|
||||
|
||||
CONFIGS += -set "L2_ENABLE"
|
||||
|
||||
#CONFIGS += -set "L1_DISABLE"
|
||||
#CONFIGS += -set "SM_DISABLE"
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
TEX_INCLUDE = $(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(AFU_DIR);$(AFU_DIR)/ccip;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,81 +1,11 @@
|
|||
PROJECT = Unittest
|
||||
TOP_LEVEL_ENTITY = VX_req_dispatch
|
||||
SRC_FILE = VX_req_dispatch.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
TEX_INCLUDE = $(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
|
@ -1,16 +1,8 @@
|
|||
PROJECT = Vortex
|
||||
TOP_LEVEL_ENTITY = Vortex
|
||||
SRC_FILE = Vortex.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
include ../../common.mk
|
||||
|
||||
CONFIGS += -set "EXT_GFX_ENABLE"
|
||||
|
||||
|
@ -24,72 +16,10 @@ CONFIGS += -set "EXT_GFX_ENABLE"
|
|||
|
||||
#CONFIGS += -set "NUM_WARPS=2" -set "NUM_THREADS=2"
|
||||
|
||||
CONFIGS += -set "NUM_CORES=4"
|
||||
CONFIGS += -set "NUM_CORES=2"
|
||||
|
||||
CONFIGS += -set "L2_ENABLE"
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
GFX_INCLUDE = $(RTL_DIR)/tex_unit;$(RTL_DIR)/raster_unit;$(RTL_DIR)/rop_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(GFX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,16 +1,12 @@
|
|||
PROJECT = Vortex
|
||||
TOP_LEVEL_ENTITY = Vortex
|
||||
SRC_FILE = Vortex.sv
|
||||
RTL_DIR = ../../../../../rtl
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
IP_DIR = ../../../ip/arria10
|
||||
include ../../common.mk
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#IP_DIR = ../../../ip/stratix10
|
||||
CONFIGS += -set "NUM_CORES=2"
|
||||
|
||||
CONFIGS += -set "L2_ENABLE"
|
||||
|
||||
#CONFIGS += -set "L1_DISABLE"
|
||||
|
||||
|
@ -20,72 +16,6 @@ IP_DIR = ../../../ip/arria10
|
|||
|
||||
#CONFIGS += -set "NUM_WARPS=2" -set "NUM_THREADS=2"
|
||||
|
||||
CONFIGS += -set "NUM_CORES=4"
|
||||
|
||||
FPU_INCLUDE = $(RTL_DIR)/fpu_unit;$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src
|
||||
TEX_INCLUDE = $(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(IP_DIR);$(FPU_INCLUDE);$(TEX_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" $(CONFIGS)
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -202,6 +202,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
uint64_t csr_stalls = 0;
|
||||
uint64_t alu_stalls = 0;
|
||||
uint64_t gpu_stalls = 0;
|
||||
uint64_t wctl_issue_stalls = 0;
|
||||
// PERF: decode
|
||||
uint64_t loads = 0;
|
||||
uint64_t stores = 0;
|
||||
|
@ -228,6 +229,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
// PERF: texunit
|
||||
uint64_t tex_mem_reads = 0;
|
||||
uint64_t tex_mem_lat = 0;
|
||||
// PERF: tex issue
|
||||
uint64_t tex_issue_stalls = 0;
|
||||
// PERF: tex tcache
|
||||
uint64_t tcache_reads = 0;
|
||||
uint64_t tcache_read_misses = 0;
|
||||
|
@ -238,6 +241,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
uint64_t raster_mem_reads = 0;
|
||||
uint64_t raster_mem_lat = 0;
|
||||
uint64_t raster_stall_cycles = 0;
|
||||
// PERF: raster issue
|
||||
uint64_t raster_issue_stalls = 0;
|
||||
// PERF: raster cache
|
||||
uint64_t rcache_reads = 0;
|
||||
uint64_t rcache_read_misses = 0;
|
||||
|
@ -249,6 +254,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
uint64_t rop_mem_writes = 0;
|
||||
uint64_t rop_mem_lat = 0;
|
||||
uint64_t rop_stall_cycles = 0;
|
||||
// PERF: rop issue
|
||||
uint64_t rop_issue_stalls = 0;
|
||||
// PERF: rop ocache
|
||||
uint64_t ocache_reads = 0;
|
||||
uint64_t ocache_writes = 0;
|
||||
|
@ -319,7 +326,11 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
// gpu_stall
|
||||
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
// wctl_stall
|
||||
uint64_t wctl_issue_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_WCTL_ISSUE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: wctl issue stalls=%ld\n", core_id, wctl_issue_stalls_per_core);
|
||||
wctl_issue_stalls += wctl_issue_stalls_per_core;
|
||||
|
||||
// PERF: decode
|
||||
// loads
|
||||
|
@ -403,6 +414,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
tcache_bank_stalls = get_csr_64(staging_ptr, CSR_MPM_TCACHE_BANK_ST);
|
||||
tcache_mshr_stalls = get_csr_64(staging_ptr, CSR_MPM_TCACHE_MSHR_ST);
|
||||
}
|
||||
// issue_stall
|
||||
uint64_t issue_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_TEX_ISSUE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: tex issue stalls=%ld\n", core_id, issue_stalls_per_core);
|
||||
tex_issue_stalls += issue_stalls_per_core;
|
||||
#endif
|
||||
} break;
|
||||
case DCR_MPM_CLASS_RASTER: {
|
||||
|
@ -417,6 +432,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
rcache_bank_stalls = get_csr_64(staging_ptr, CSR_MPM_RCACHE_BANK_ST);
|
||||
rcache_mshr_stalls = get_csr_64(staging_ptr, CSR_MPM_RCACHE_MSHR_ST);
|
||||
}
|
||||
// issue_stall
|
||||
uint64_t raster_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_RASTER_ISSUE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: raster issue stalls=%ld\n", core_id, raster_stalls_per_core);
|
||||
raster_issue_stalls += raster_stalls_per_core;
|
||||
#endif
|
||||
} break;
|
||||
case DCR_MPM_CLASS_ROP: {
|
||||
|
@ -434,6 +453,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
ocache_bank_stalls = get_csr_64(staging_ptr, CSR_MPM_OCACHE_BANK_ST);
|
||||
ocache_mshr_stalls = get_csr_64(staging_ptr, CSR_MPM_OCACHE_MSHR_ST);
|
||||
}
|
||||
// issue_stall
|
||||
uint64_t rop_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ROP_ISSUE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: rop issue stalls=%ld\n", core_id, rop_stalls_per_core);
|
||||
rop_issue_stalls += rop_stalls_per_core;
|
||||
#endif
|
||||
} break;
|
||||
default:
|
||||
|
@ -461,6 +484,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
|
||||
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: wctl issue stalls=%ld\n", wctl_issue_stalls);
|
||||
fprintf(stream, "PERF: loads=%ld\n", loads);
|
||||
fprintf(stream, "PERF: stores=%ld\n", stores);
|
||||
fprintf(stream, "PERF: branches=%ld\n", branches);
|
||||
|
@ -483,6 +507,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
int tex_avg_lat = (int)(double(tex_mem_lat) / double(tex_mem_reads));
|
||||
fprintf(stream, "PERF: tex memory reads=%ld\n", tex_mem_reads);
|
||||
fprintf(stream, "PERF: tex memory average latency=%d cycles\n", tex_avg_lat);
|
||||
fprintf(stream, "PERF: tex issue stalls=%ld\n", tex_issue_stalls);
|
||||
int tcache_read_hit_ratio = (int)((1.0 - (double(tcache_read_misses) / double(tcache_reads))) * 100);
|
||||
int tcache_bank_utilization = (int)((double(tcache_reads) / double(tcache_reads + tcache_bank_stalls)) * 100);
|
||||
fprintf(stream, "PERF: tcache reads=%ld\n", tcache_reads);
|
||||
|
@ -498,6 +523,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: raster memory reads=%ld\n", raster_mem_reads);
|
||||
fprintf(stream, "PERF: raster memory latency=%d cycles\n", raster_mem_avg_lat);
|
||||
fprintf(stream, "PERF: raster stall cycles=%ld cycles (%d%%)\n", raster_stall_cycles, raster_stall_cycles_ratio);
|
||||
fprintf(stream, "PERF: raster issue stalls=%ld\n", raster_issue_stalls);
|
||||
// cache perf counters
|
||||
int rcache_read_hit_ratio = (int)((1.0 - (double(rcache_read_misses) / double(rcache_reads))) * 100);
|
||||
int rcache_bank_utilization = (int)((double(rcache_reads) / double(rcache_reads + rcache_bank_stalls)) * 100);
|
||||
|
@ -515,6 +541,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: rop memory writes=%ld\n", rop_mem_writes);
|
||||
fprintf(stream, "PERF: rop memory average latency=%d cycles\n", rop_mem_avg_lat);
|
||||
fprintf(stream, "PERF: rop stall cycles=%ld cycles (%d%%)\n", rop_stall_cycles, rop_stall_cycles_ratio);
|
||||
fprintf(stream, "PERF: rop issue stalls=%ld\n", rop_issue_stalls);
|
||||
// cache perf counters
|
||||
int ocache_read_hit_ratio = (int)((1.0 - (double(ocache_read_misses) / double(ocache_reads))) * 100);
|
||||
int ocache_write_hit_ratio = (int)((1.0 - (double(ocache_write_misses) / double(ocache_writes))) * 100);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue