mirror of
https://github.com/lowRISC/ibex.git
synced 2025-04-22 04:47:25 -04:00
Added taken branch performance counter and excluded jumps and branches
in icache misses
This commit is contained in:
parent
a6c4f6d4ef
commit
4ed498014b
5 changed files with 70 additions and 72 deletions
|
@ -106,7 +106,6 @@ module riscv_controller
|
|||
|
||||
// Performance Counters
|
||||
output logic perf_jump_o, // we are executing a jump instruction (j, jr, jal, jalr)
|
||||
output logic perf_branch_o, // we are executing a branch instruction (bf, bnf)
|
||||
output logic perf_jr_stall_o, // stall due to jump-register-hazard
|
||||
output logic perf_ld_stall_o // stall due to load-use-hazard
|
||||
);
|
||||
|
@ -516,7 +515,6 @@ module riscv_controller
|
|||
|
||||
// Performance Counters
|
||||
assign perf_jump_o = (jump_in_id_i == `BRANCH_JAL || jump_in_id_i == `BRANCH_JALR);
|
||||
assign perf_branch_o = (jump_in_id_i == `BRANCH_COND);
|
||||
assign perf_jr_stall_o = jr_stall_o;
|
||||
assign perf_ld_stall_o = load_stall_o;
|
||||
|
||||
|
|
|
@ -78,8 +78,10 @@ module riscv_cs_registers
|
|||
input logic is_decoding_i, // controller is in DECODE state
|
||||
|
||||
input logic imiss_i, // instruction fetch
|
||||
input logic pc_set_i, // pc was set to a new value
|
||||
input logic jump_i, // jump instruction seen (j, jr, jal, jalr)
|
||||
input logic branch_i, // branch instruction seen (bf, bnf)
|
||||
input logic branch_taken_i, // branch was taken
|
||||
input logic ld_stall_i, // load use hazard
|
||||
input logic jr_stall_i, // jump register use hazard
|
||||
|
||||
|
@ -89,7 +91,7 @@ module riscv_cs_registers
|
|||
input logic [N_EXT_CNT-1:0] ext_counters_i
|
||||
);
|
||||
|
||||
localparam N_PERF_COUNTERS = 10 + N_EXT_CNT;
|
||||
localparam N_PERF_COUNTERS = 11 + N_EXT_CNT;
|
||||
|
||||
`ifdef ASIC_SYNTHESIS
|
||||
localparam N_PERF_REGS = 1;
|
||||
|
@ -276,12 +278,13 @@ module riscv_cs_registers
|
|||
assign PCCR_in[1] = id_valid_i & is_decoding_i; // instruction counter
|
||||
assign PCCR_in[2] = ld_stall_i & id_valid_q; // nr of load use hazards
|
||||
assign PCCR_in[3] = jr_stall_i & id_valid_q; // nr of jump register hazards
|
||||
assign PCCR_in[4] = imiss_i; // cycles waiting for instruction fetches
|
||||
assign PCCR_in[4] = imiss_i & (~pc_set_i); // cycles waiting for instruction fetches, excluding jumps and branches
|
||||
assign PCCR_in[5] = mem_load_i; // nr of loads
|
||||
assign PCCR_in[6] = mem_store_i; // nr of stores
|
||||
assign PCCR_in[7] = jump_i & id_valid_q; // nr of jumps (unconditional)
|
||||
assign PCCR_in[8] = branch_i & id_valid_q; // nr of branches (conditional)
|
||||
assign PCCR_in[9] = id_valid_i & is_decoding_i & is_compressed_i; // compressed instruction counter
|
||||
assign PCCR_in[7] = jump_i & id_valid_q; // nr of jumps (unconditional)
|
||||
assign PCCR_in[8] = branch_i & id_valid_q; // nr of branches (conditional)
|
||||
assign PCCR_in[9] = branch_taken_i & id_valid_q; // nr of taken branches (conditional)
|
||||
assign PCCR_in[10] = id_valid_i & is_decoding_i & is_compressed_i; // compressed instruction counter
|
||||
|
||||
// assign external performance counters
|
||||
generate
|
||||
|
|
|
@ -96,20 +96,19 @@ controlled by the \instr{saturation} bit in PCMR.
|
|||
\textbf{PCCR0} & \textbf{CYCLES} & Count the number of cycles the core was running \\ \hline
|
||||
\textbf{PCCR1} & \textbf{INSTR} & Count the number of instructions executed \\ \hline
|
||||
\textbf{PCCR2} & \textbf{LD\_STALL} & Number of load data hazards \\ \hline
|
||||
\textbf{PCCR3} & \textbf{JMP\_STALL} & Number of jump register data hazards \\ \hline
|
||||
\textbf{PCCR3} & \textbf{JR\_STALL} & Number of jump register data hazards \\ \hline
|
||||
\textbf{PCCR4} & \textbf{IMISS} & Cycles waiting for instruction fetches. i.e. the number of instructions wasted due to non-ideal caches \\ \hline
|
||||
\textbf{PCCR5} & \textbf{WBRANCH} & Number of wrong predicted branches \\ \hline
|
||||
\textbf{PCCR6} & \textbf{WBRANCH\_CYC} & Cycles wasted due to wrong predicted branches \\ \hline
|
||||
\textbf{PCCR7} & \textbf{LD} & Number of memory loads executed. Misaligned accesses are counted twice \\ \hline
|
||||
\textbf{PCCR8} & \textbf{ST} & Number of memory stores executed. Misaligned accesses are counted twice \\ \hline
|
||||
\textbf{PCCR9} & \textbf{JUMP} & Number of jumps (j, jal, jr, jalr)\\ \hline
|
||||
\textbf{PCCR10} & \textbf{BRANCH} & Number of branches (bf, bnf), counts taken and not taken branches\\ \hline
|
||||
\textbf{PCCR11} & \textbf{DELAY\_NOP} & Number of empty (l.nop) delay slots \\ \hline
|
||||
\textbf{PCCR12} & \textbf{LD\_EXT} & Number of memory loads to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR13} & \textbf{ST\_EXT} & Number of memory stores to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR14} & \textbf{LD\_EXT\_CYC} & Cycles used for memory loads to EXT. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR15} & \textbf{ST\_EXT\_CYC} & Cycles used for memory stores to EXT. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR16} & \textbf{TCDM\_CONT} & Cycles wasted due to TCDM/log-interconnect contention \\ \hline
|
||||
\textbf{PCCR5} & \textbf{LD} & Number of memory loads executed. Misaligned accesses are counted twice \\ \hline
|
||||
\textbf{PCCR6} & \textbf{ST} & Number of memory stores executed. Misaligned accesses are counted twice \\ \hline
|
||||
\textbf{PCCR7} & \textbf{JUMP} & Number of jumps (j, jal, jr, jalr)\\ \hline
|
||||
\textbf{PCCR8} & \textbf{BRANCH} & Number of branches, counts taken and not taken branches\\ \hline
|
||||
\textbf{PCCR9} & \textbf{BTAKEN} & Number of taken branches \\ \hline
|
||||
\textbf{PCCR10} & \textbf{RVC} & Number of compressed instructions executed \\ \hline
|
||||
\textbf{PCCR11} & \textbf{LD\_EXT} & Number of memory loads to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR12} & \textbf{ST\_EXT} & Number of memory stores to EXT executed. Misaligned accesses are counted twice. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR13} & \textbf{LD\_EXT\_CYC} & Cycles used for memory loads to EXT. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR14} & \textbf{ST\_EXT\_CYC} & Cycles used for memory stores to EXT. Every non-TCDM access is considered external \\ \hline
|
||||
\textbf{PCCR15} & \textbf{TCDM\_CONT} & Cycles wasted due to TCDM/log-interconnect contention \\ \hline
|
||||
\textbf{PCCR31} & \textbf{ALL} & Special Register, a write to this register will set all counters to the supplied value\\ \bottomrule
|
||||
\end{tabularx}
|
||||
\end{table}
|
||||
|
|
|
@ -166,7 +166,6 @@ module riscv_id_stage
|
|||
|
||||
// Performance Counters
|
||||
output logic perf_jump_o, // we are executing a jump instruction
|
||||
output logic perf_branch_o, // we are executing a branch instruction
|
||||
output logic perf_jr_stall_o, // jump-register-hazard
|
||||
output logic perf_ld_stall_o // load-use-hazard
|
||||
);
|
||||
|
@ -710,7 +709,6 @@ module riscv_id_stage
|
|||
|
||||
// Performance Counters
|
||||
.perf_jump_o ( perf_jump_o ),
|
||||
.perf_branch_o ( perf_branch_o ),
|
||||
.perf_jr_stall_o ( perf_jr_stall_o ),
|
||||
.perf_ld_stall_o ( perf_ld_stall_o )
|
||||
);
|
||||
|
|
100
riscv_core.sv
100
riscv_core.sv
|
@ -225,7 +225,6 @@ module riscv_core
|
|||
// Performance Counters
|
||||
logic perf_imiss;
|
||||
logic perf_jump;
|
||||
logic perf_branch;
|
||||
logic perf_jr_stall;
|
||||
logic perf_ld_stall;
|
||||
|
||||
|
@ -248,21 +247,21 @@ module riscv_core
|
|||
)
|
||||
if_stage_i
|
||||
(
|
||||
.clk ( clk ),
|
||||
.rst_n ( rst_n ),
|
||||
.clk ( clk ),
|
||||
.rst_n ( rst_n ),
|
||||
|
||||
// boot address (trap vector location)
|
||||
.boot_addr_i ( boot_addr_i ),
|
||||
.boot_addr_i ( boot_addr_i ),
|
||||
|
||||
// instruction request control
|
||||
.req_i ( instr_req_int ),
|
||||
.req_i ( instr_req_int ),
|
||||
|
||||
// instruction cache interface
|
||||
.instr_req_o ( instr_req_o ),
|
||||
.instr_addr_o ( instr_addr_o ),
|
||||
.instr_gnt_i ( instr_gnt_i ),
|
||||
.instr_rvalid_i ( instr_rvalid_i ),
|
||||
.instr_rdata_i ( instr_rdata_i ),
|
||||
.instr_req_o ( instr_req_o ),
|
||||
.instr_addr_o ( instr_addr_o ),
|
||||
.instr_gnt_i ( instr_gnt_i ),
|
||||
.instr_rvalid_i ( instr_rvalid_i ),
|
||||
.instr_rdata_i ( instr_rdata_i ),
|
||||
|
||||
// outputs to ID stage
|
||||
.hwlp_dec_cnt_id_o ( hwlp_dec_cnt_id ),
|
||||
|
@ -283,26 +282,26 @@ module riscv_core
|
|||
.exc_vec_pc_mux_i ( exc_vec_pc_mux_id ),
|
||||
|
||||
// from hwloop registers
|
||||
.hwlp_start_i ( hwlp_start ),
|
||||
.hwlp_end_i ( hwlp_end ),
|
||||
.hwlp_cnt_i ( hwlp_cnt ),
|
||||
.hwlp_start_i ( hwlp_start ),
|
||||
.hwlp_end_i ( hwlp_end ),
|
||||
.hwlp_cnt_i ( hwlp_cnt ),
|
||||
|
||||
// from debug unit
|
||||
.dbg_npc_i ( dbg_npc ),
|
||||
.dbg_set_npc_i ( dbg_set_npc ),
|
||||
.dbg_npc_i ( dbg_npc ),
|
||||
.dbg_set_npc_i ( dbg_set_npc ),
|
||||
|
||||
// Jump targets
|
||||
.jump_target_id_i ( jump_target_id ),
|
||||
.jump_target_ex_i ( jump_target_ex ),
|
||||
.jump_target_id_i ( jump_target_id ),
|
||||
.jump_target_ex_i ( jump_target_ex ),
|
||||
|
||||
// pipeline stalls
|
||||
.halt_if_i ( halt_if ),
|
||||
.if_ready_o ( if_ready ),
|
||||
.id_ready_i ( id_ready ),
|
||||
.if_valid_o ( if_valid ),
|
||||
.halt_if_i ( halt_if ),
|
||||
.if_ready_o ( if_ready ),
|
||||
.id_ready_i ( id_ready ),
|
||||
.if_valid_o ( if_valid ),
|
||||
|
||||
.if_busy_o ( if_busy ),
|
||||
.perf_imiss_o ( perf_imiss )
|
||||
.if_busy_o ( if_busy ),
|
||||
.perf_imiss_o ( perf_imiss )
|
||||
);
|
||||
|
||||
|
||||
|
@ -447,7 +446,6 @@ module riscv_core
|
|||
|
||||
// Performance Counters
|
||||
.perf_jump_o ( perf_jump ),
|
||||
.perf_branch_o ( perf_branch ),
|
||||
.perf_jr_stall_o ( perf_jr_stall ),
|
||||
.perf_ld_stall_o ( perf_ld_stall )
|
||||
);
|
||||
|
@ -587,49 +585,51 @@ module riscv_core
|
|||
)
|
||||
cs_registers_i
|
||||
(
|
||||
.clk ( clk ),
|
||||
.rst_n ( rst_n ),
|
||||
.clk ( clk ),
|
||||
.rst_n ( rst_n ),
|
||||
|
||||
// Core and Cluster ID from outside
|
||||
.core_id_i ( core_id_i ),
|
||||
.cluster_id_i ( cluster_id_i ),
|
||||
.core_id_i ( core_id_i ),
|
||||
.cluster_id_i ( cluster_id_i ),
|
||||
|
||||
// Interface to CSRs (SRAM like)
|
||||
.csr_access_i ( csr_access_ex ),
|
||||
.csr_addr_i ( csr_addr ),
|
||||
.csr_wdata_i ( csr_wdata ),
|
||||
.csr_op_i ( csr_op ),
|
||||
.csr_rdata_o ( csr_rdata ),
|
||||
.csr_access_i ( csr_access_ex ),
|
||||
.csr_addr_i ( csr_addr ),
|
||||
.csr_wdata_i ( csr_wdata ),
|
||||
.csr_op_i ( csr_op ),
|
||||
.csr_rdata_o ( csr_rdata ),
|
||||
|
||||
// Interrupt related control signals
|
||||
.irq_enable_o ( irq_enable ),
|
||||
.epcr_o ( epcr ),
|
||||
.irq_enable_o ( irq_enable ),
|
||||
.epcr_o ( epcr ),
|
||||
|
||||
.curr_pc_id_i ( current_pc_id ), // from IF stage
|
||||
.save_pc_id_i ( save_pc_id ),
|
||||
.curr_pc_id_i ( current_pc_id ), // from IF stage
|
||||
.save_pc_id_i ( save_pc_id ),
|
||||
|
||||
.exc_cause_i ( exc_cause ),
|
||||
.save_exc_cause_i ( save_exc_cause ),
|
||||
.exc_cause_i ( exc_cause ),
|
||||
.save_exc_cause_i ( save_exc_cause ),
|
||||
|
||||
// from hwloop registers
|
||||
.hwlp_start_i ( hwlp_start ),
|
||||
.hwlp_end_i ( hwlp_end ),
|
||||
.hwlp_cnt_i ( hwlp_cnt ),
|
||||
.hwlp_start_i ( hwlp_start ),
|
||||
.hwlp_end_i ( hwlp_end ),
|
||||
.hwlp_cnt_i ( hwlp_cnt ),
|
||||
|
||||
.hwlp_regid_o ( csr_hwlp_regid ),
|
||||
.hwlp_we_o ( csr_hwlp_we ),
|
||||
.hwlp_data_o ( csr_hwlp_data ),
|
||||
.hwlp_regid_o ( csr_hwlp_regid ),
|
||||
.hwlp_we_o ( csr_hwlp_we ),
|
||||
.hwlp_data_o ( csr_hwlp_data ),
|
||||
|
||||
// performance counter related signals
|
||||
.id_valid_i ( id_valid ),
|
||||
.is_compressed_i ( is_compressed_id ),
|
||||
.is_decoding_i ( is_decoding ),
|
||||
|
||||
.imiss_i ( perf_imiss ),
|
||||
.jump_i ( perf_jump ),
|
||||
.branch_i ( perf_branch ),
|
||||
.ld_stall_i ( perf_ld_stall ),
|
||||
.jr_stall_i ( perf_jr_stall ),
|
||||
.imiss_i ( perf_imiss ),
|
||||
.pc_set_i ( pc_set ),
|
||||
.jump_i ( perf_jump ),
|
||||
.branch_i ( branch_in_ex ),
|
||||
.branch_taken_i ( branch_decision ),
|
||||
.ld_stall_i ( perf_ld_stall ),
|
||||
.jr_stall_i ( perf_jr_stall ),
|
||||
|
||||
.mem_load_i ( data_req_o & data_gnt_i & (~data_we_o) ),
|
||||
.mem_store_i ( data_req_o & data_gnt_i & data_we_o ),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue