mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
perf counters generic size
This commit is contained in:
parent
a60361ac2d
commit
d808aa2735
10 changed files with 123 additions and 118 deletions
|
@ -114,61 +114,61 @@ module VX_csr_data #(
|
|||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[43:32]);
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[43:32]);
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[43:32]);
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[43:32]);
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[43:32]);
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[43:32]);
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[43:32]);
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[43:32]);
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[43:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[43:32]);
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[43:32]);
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[43:32]);
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[43:32]);
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[43:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[43:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[43:32]);
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[43:32]);
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[43:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: DRAM
|
||||
`CSR_MPM_DRAM_READS : read_data_r = perf_memsys_if.dram_reads[31:0];
|
||||
`CSR_MPM_DRAM_READS_H : read_data_r = 32'(perf_memsys_if.dram_reads[43:32]);
|
||||
`CSR_MPM_DRAM_READS_H : read_data_r = 32'(perf_memsys_if.dram_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DRAM_WRITES : read_data_r = perf_memsys_if.dram_writes[31:0];
|
||||
`CSR_MPM_DRAM_WRITES_H : read_data_r = 32'(perf_memsys_if.dram_writes[43:32]);
|
||||
`CSR_MPM_DRAM_WRITES_H : read_data_r = 32'(perf_memsys_if.dram_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DRAM_ST : read_data_r = perf_memsys_if.dram_stalls[31:0];
|
||||
`CSR_MPM_DRAM_ST_H : read_data_r = 32'(perf_memsys_if.dram_stalls[43:32]);
|
||||
`CSR_MPM_DRAM_ST_H : read_data_r = 32'(perf_memsys_if.dram_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = 32'(perf_memsys_if.dram_latency[43:32]);
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = 32'(perf_memsys_if.dram_latency[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
@ -186,9 +186,9 @@ module VX_csr_data #(
|
|||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[43:32]);
|
||||
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_INSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[43:32]);
|
||||
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define PERF_CTR_BITS 44
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
|
|
|
@ -120,14 +120,14 @@ module VX_issue #(
|
|||
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [43:0] perf_ibf_stalls;
|
||||
reg [43:0] perf_scb_stalls;
|
||||
reg [43:0] perf_alu_stalls;
|
||||
reg [43:0] perf_lsu_stalls;
|
||||
reg [43:0] perf_csr_stalls;
|
||||
reg [43:0] perf_gpu_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_scb_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_alu_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_lsu_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_csr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [43:0] perf_fpu_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -143,26 +143,26 @@ module VX_issue #(
|
|||
`endif
|
||||
end else begin
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + 44'd1;
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + 44'd1;
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 44'd1;
|
||||
perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 44'd1;
|
||||
perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + 44'd1;
|
||||
perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 44'd1;
|
||||
perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 44'd1;
|
||||
perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
|
|
@ -101,7 +101,8 @@ module VX_mem_unit # (
|
|||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.IN_ORDER_DRAM (!(`L2_ENABLE || `L3_ENABLE))
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
|
@ -160,7 +161,8 @@ module VX_mem_unit # (
|
|||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.IN_ORDER_DRAM (!(`L2_ENABLE || `L3_ENABLE))
|
||||
) dcache (
|
||||
`SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
||||
|
@ -319,22 +321,22 @@ end else begin
|
|||
assign perf_memsys_if.smem_bank_stalls = 0;
|
||||
end
|
||||
|
||||
reg [43:0] perf_dram_lat_per_cycle;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dram_lat_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_lat_per_cycle <= 0;
|
||||
end else begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle +
|
||||
44'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
|
||||
`PERF_CTR_BITS'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
|
||||
2'((dram_rsp_if.valid && dram_rsp_if.ready) && !(dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready))));
|
||||
end
|
||||
end
|
||||
|
||||
reg [43:0] perf_dram_reads;
|
||||
reg [43:0] perf_dram_writes;
|
||||
reg [43:0] perf_dram_lat;
|
||||
reg [43:0] perf_dram_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dram_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dram_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dram_lat;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_dram_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -344,13 +346,13 @@ end
|
|||
perf_dram_stalls <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid && dram_req_if.ready && !dram_req_if.rw) begin
|
||||
perf_dram_reads <= perf_dram_reads + 44'd1;
|
||||
perf_dram_reads <= perf_dram_reads + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (dram_req_if.valid && dram_req_if.ready && dram_req_if.rw) begin
|
||||
perf_dram_writes <= perf_dram_writes + 44'd1;
|
||||
perf_dram_writes <= perf_dram_writes + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (dram_req_if.valid && !dram_req_if.ready) begin
|
||||
perf_dram_stalls <= perf_dram_stalls + 44'd1;
|
||||
perf_dram_stalls <= perf_dram_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
|
||||
end
|
||||
|
|
45
hw/rtl/cache/VX_cache.v
vendored
45
hw/rtl/cache/VX_cache.v
vendored
|
@ -49,8 +49,6 @@ module VX_cache #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire flush,
|
||||
|
||||
// Core request
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
|
@ -66,11 +64,6 @@ module VX_cache #(
|
|||
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
`endif
|
||||
|
||||
// DRAM request
|
||||
output wire dram_req_valid,
|
||||
output wire dram_req_rw,
|
||||
|
@ -84,7 +77,15 @@ module VX_cache #(
|
|||
input wire dram_rsp_valid,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data,
|
||||
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
|
||||
output wire dram_rsp_ready
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
`endif
|
||||
|
||||
// device flush
|
||||
input wire flush
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
||||
|
@ -422,13 +423,13 @@ module VX_cache #(
|
|||
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
|
||||
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
|
||||
|
||||
reg [43:0] perf_core_reads;
|
||||
reg [43:0] perf_core_writes;
|
||||
reg [43:0] perf_read_misses;
|
||||
reg [43:0] perf_write_misses;
|
||||
reg [43:0] perf_mshr_stalls;
|
||||
reg [43:0] perf_pipe_stalls;
|
||||
reg [43:0] perf_crsp_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_read_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_pipe_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -440,13 +441,13 @@ module VX_cache #(
|
|||
perf_pipe_stalls <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + 44'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses+ 44'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + 44'(perf_mshr_stall_per_cycle);
|
||||
perf_pipe_stalls <= perf_pipe_stalls + 44'(perf_pipe_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses+ `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
||||
perf_pipe_stalls <= perf_pipe_stalls + `PERF_CTR_BITS'(perf_pipe_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
|
|
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
|
@ -24,7 +24,7 @@ module VX_cache_core_req_bank_sel #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [43:0] bank_stalls,
|
||||
output wire [`PERF_CTR_BITS-1:0] bank_stalls,
|
||||
`endif
|
||||
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
|
@ -306,13 +306,13 @@ module VX_cache_core_req_bank_sel #(
|
|||
end
|
||||
end
|
||||
|
||||
reg [43:0] bank_stalls_r;
|
||||
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + 44'($countones(core_req_sel_r & ~core_req_ready));
|
||||
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready));
|
||||
end
|
||||
end
|
||||
|
||||
|
|
12
hw/rtl/cache/VX_shared_mem.v
vendored
12
hw/rtl/cache/VX_shared_mem.v
vendored
|
@ -250,9 +250,9 @@ module VX_shared_mem #(
|
|||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
end
|
||||
|
||||
reg [43:0] perf_core_reads;
|
||||
reg [43:0] perf_core_writes;
|
||||
reg [43:0] perf_crsp_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -260,9 +260,9 @@ module VX_shared_mem #(
|
|||
perf_core_writes <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -5,14 +5,14 @@
|
|||
|
||||
interface VX_perf_cache_if ();
|
||||
|
||||
wire [43:0] reads;
|
||||
wire [43:0] writes;
|
||||
wire [43:0] read_misses;
|
||||
wire [43:0] write_misses;
|
||||
wire [43:0] bank_stalls;
|
||||
wire [43:0] mshr_stalls;
|
||||
wire [43:0] pipe_stalls;
|
||||
wire [43:0] crsp_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] reads;
|
||||
wire [`PERF_CTR_BITS-1:0] writes;
|
||||
wire [`PERF_CTR_BITS-1:0] read_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] write_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] bank_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] mshr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] crsp_stalls;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -5,28 +5,28 @@
|
|||
|
||||
interface VX_perf_memsys_if ();
|
||||
|
||||
wire [43:0] icache_reads;
|
||||
wire [43:0] icache_read_misses;
|
||||
wire [43:0] icache_pipe_stalls;
|
||||
wire [43:0] icache_crsp_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_read_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_crsp_stalls;
|
||||
|
||||
wire [43:0] dcache_reads;
|
||||
wire [43:0] dcache_writes;
|
||||
wire [43:0] dcache_read_misses;
|
||||
wire [43:0] dcache_write_misses;
|
||||
wire [43:0] dcache_bank_stalls;
|
||||
wire [43:0] dcache_mshr_stalls;
|
||||
wire [43:0] dcache_pipe_stalls;
|
||||
wire [43:0] dcache_crsp_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_read_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_write_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_bank_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_mshr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_crsp_stalls;
|
||||
|
||||
wire [43:0] smem_reads;
|
||||
wire [43:0] smem_writes;
|
||||
wire [43:0] smem_bank_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] smem_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] smem_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] smem_bank_stalls;
|
||||
|
||||
wire [43:0] dram_reads;
|
||||
wire [43:0] dram_writes;
|
||||
wire [43:0] dram_stalls;
|
||||
wire [43:0] dram_latency;
|
||||
wire [`PERF_CTR_BITS-1:0] dram_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] dram_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] dram_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dram_latency;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -4,14 +4,14 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
wire [43:0] ibf_stalls;
|
||||
wire [43:0] scb_stalls;
|
||||
wire [43:0] lsu_stalls;
|
||||
wire [43:0] csr_stalls;
|
||||
wire [43:0] alu_stalls;
|
||||
wire [43:0] gpu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] lsu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] csr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] alu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [43:0] fpu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] fpu_stalls;
|
||||
`endif
|
||||
endinterface
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue