[rtl] Fix performance counter bug

- Make sure performance counters only count retired, non-trapping
  instructions excluding ebrk/ecall
- Rewire some signalling through the writeback stage to allow
  instruction retire to be signalled from one place
- Relates to #1132

Signed-off-by: Tom Roberts <tomroberts@lowrisc.org>
This commit is contained in:
Tom Roberts 2020-10-20 14:52:16 +01:00 committed by Tom Roberts
parent 25fde8fcb3
commit 62405f931f
3 changed files with 130 additions and 102 deletions

View file

@ -131,6 +131,7 @@ module ibex_core #(
// ease fan-out)
logic [15:0] instr_rdata_c_id; // Compressed instruction sampled inside IF stage
logic instr_is_compressed_id;
logic instr_perf_count_id;
logic instr_bp_taken_id;
logic instr_fetch_err; // Bus error on instr fetch
logic instr_fetch_err_plus2; // Instruction error is misaligned
@ -246,6 +247,7 @@ module ibex_core #(
logic ex_valid;
logic lsu_resp_valid;
logic lsu_resp_err;
// Signals between instruction core interface and pipe (if and id stages)
logic instr_req_int; // Id stage asserts a req to instruction core interface
@ -298,9 +300,10 @@ module ibex_core #(
// signals relating to instruction movements between pipeline stages
// used by performance counters and RVFI
logic instr_id_done;
logic instr_id_done_compressed;
logic instr_done_wb;
logic perf_instr_ret_wb;
logic perf_instr_ret_compressed_wb;
logic perf_iside_wait;
logic perf_dside_wait;
logic perf_mul_wait;
@ -616,6 +619,7 @@ module ibex_core #(
.en_wb_o ( en_wb ),
.instr_type_wb_o ( instr_type_wb ),
.instr_perf_count_id_o ( instr_perf_count_id ),
.ready_wb_i ( ready_wb ),
.outstanding_load_wb_i ( outstanding_load_wb ),
.outstanding_store_wb_i ( outstanding_store_wb ),
@ -627,8 +631,7 @@ module ibex_core #(
.perf_dside_wait_o ( perf_dside_wait ),
.perf_mul_wait_o ( perf_mul_wait ),
.perf_div_wait_o ( perf_div_wait ),
.instr_id_done_o ( instr_id_done ),
.instr_id_done_compressed_o ( instr_id_done_compressed )
.instr_id_done_o ( instr_id_done )
);
// for RVFI only
@ -683,7 +686,8 @@ module ibex_core #(
// Load/store unit //
/////////////////////
assign data_req_o = data_req_out & ~pmp_req_err[PMP_D];
assign data_req_o = data_req_out & ~pmp_req_err[PMP_D];
assign lsu_resp_err = lsu_load_err | lsu_store_err;
ibex_load_store_unit load_store_unit_i (
.clk_i ( clk ),
@ -734,34 +738,39 @@ module ibex_core #(
ibex_wb_stage #(
.WritebackStage ( WritebackStage )
) wb_stage_i (
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.en_wb_i ( en_wb ),
.instr_type_wb_i ( instr_type_wb ),
.pc_id_i ( pc_id ),
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.en_wb_i ( en_wb ),
.instr_type_wb_i ( instr_type_wb ),
.pc_id_i ( pc_id ),
.instr_is_compressed_id_i ( instr_is_compressed_id ),
.instr_perf_count_id_i ( instr_perf_count_id ),
.ready_wb_o ( ready_wb ),
.rf_write_wb_o ( rf_write_wb ),
.outstanding_load_wb_o ( outstanding_load_wb ),
.outstanding_store_wb_o ( outstanding_store_wb ),
.pc_wb_o ( pc_wb ),
.ready_wb_o ( ready_wb ),
.rf_write_wb_o ( rf_write_wb ),
.outstanding_load_wb_o ( outstanding_load_wb ),
.outstanding_store_wb_o ( outstanding_store_wb ),
.pc_wb_o ( pc_wb ),
.perf_instr_ret_wb_o ( perf_instr_ret_wb ),
.perf_instr_ret_compressed_wb_o ( perf_instr_ret_compressed_wb ),
.rf_waddr_id_i ( rf_waddr_id ),
.rf_wdata_id_i ( rf_wdata_id ),
.rf_we_id_i ( rf_we_id ),
.rf_waddr_id_i ( rf_waddr_id ),
.rf_wdata_id_i ( rf_wdata_id ),
.rf_we_id_i ( rf_we_id ),
.rf_wdata_lsu_i ( rf_wdata_lsu ),
.rf_we_lsu_i ( rf_we_lsu ),
.rf_wdata_lsu_i ( rf_wdata_lsu ),
.rf_we_lsu_i ( rf_we_lsu ),
.rf_wdata_fwd_wb_o ( rf_wdata_fwd_wb ),
.rf_wdata_fwd_wb_o ( rf_wdata_fwd_wb ),
.rf_waddr_wb_o ( rf_waddr_wb ),
.rf_wdata_wb_o ( rf_wdata_wb ),
.rf_we_wb_o ( rf_we_wb ),
.rf_waddr_wb_o ( rf_waddr_wb ),
.rf_wdata_wb_o ( rf_wdata_wb ),
.rf_we_wb_o ( rf_we_wb ),
.lsu_resp_valid_i ( lsu_resp_valid ),
.lsu_resp_valid_i ( lsu_resp_valid ),
.lsu_resp_err_i ( lsu_resp_err ),
.instr_done_wb_o ( instr_done_wb )
.instr_done_wb_o ( instr_done_wb )
);
///////////////////////
@ -970,88 +979,88 @@ module ibex_core #(
.RV32E ( RV32E ),
.RV32M ( RV32M )
) cs_registers_i (
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.clk_i ( clk ),
.rst_ni ( rst_ni ),
// Hart ID from outside
.hart_id_i ( hart_id_i ),
.priv_mode_id_o ( priv_mode_id ),
.priv_mode_if_o ( priv_mode_if ),
.priv_mode_lsu_o ( priv_mode_lsu ),
.hart_id_i ( hart_id_i ),
.priv_mode_id_o ( priv_mode_id ),
.priv_mode_if_o ( priv_mode_if ),
.priv_mode_lsu_o ( priv_mode_lsu ),
// mtvec
.csr_mtvec_o ( csr_mtvec ),
.csr_mtvec_init_i ( csr_mtvec_init ),
.boot_addr_i ( boot_addr_i ),
.csr_mtvec_o ( csr_mtvec ),
.csr_mtvec_init_i ( csr_mtvec_init ),
.boot_addr_i ( boot_addr_i ),
// Interface to CSRs ( SRAM like )
.csr_access_i ( csr_access ),
.csr_addr_i ( csr_addr ),
.csr_wdata_i ( csr_wdata ),
.csr_op_i ( csr_op ),
.csr_op_en_i ( csr_op_en ),
.csr_rdata_o ( csr_rdata ),
// Interface to CSRs ( SRAM like )
.csr_access_i ( csr_access ),
.csr_addr_i ( csr_addr ),
.csr_wdata_i ( csr_wdata ),
.csr_op_i ( csr_op ),
.csr_op_en_i ( csr_op_en ),
.csr_rdata_o ( csr_rdata ),
// Interrupt related control signals
.irq_software_i ( irq_software_i ),
.irq_timer_i ( irq_timer_i ),
.irq_external_i ( irq_external_i ),
.irq_fast_i ( irq_fast_i ),
.nmi_mode_i ( nmi_mode ),
.irq_pending_o ( irq_pending ),
.irqs_o ( irqs ),
.csr_mstatus_mie_o ( csr_mstatus_mie ),
.csr_mstatus_tw_o ( csr_mstatus_tw ),
.csr_mepc_o ( csr_mepc ),
.irq_software_i ( irq_software_i ),
.irq_timer_i ( irq_timer_i ),
.irq_external_i ( irq_external_i ),
.irq_fast_i ( irq_fast_i ),
.nmi_mode_i ( nmi_mode ),
.irq_pending_o ( irq_pending ),
.irqs_o ( irqs ),
.csr_mstatus_mie_o ( csr_mstatus_mie ),
.csr_mstatus_tw_o ( csr_mstatus_tw ),
.csr_mepc_o ( csr_mepc ),
// PMP
.csr_pmp_cfg_o ( csr_pmp_cfg ),
.csr_pmp_addr_o ( csr_pmp_addr ),
.csr_pmp_cfg_o ( csr_pmp_cfg ),
.csr_pmp_addr_o ( csr_pmp_addr ),
// debug
.csr_depc_o ( csr_depc ),
.debug_mode_i ( debug_mode ),
.debug_cause_i ( debug_cause ),
.debug_csr_save_i ( debug_csr_save ),
.debug_single_step_o ( debug_single_step ),
.debug_ebreakm_o ( debug_ebreakm ),
.debug_ebreaku_o ( debug_ebreaku ),
.trigger_match_o ( trigger_match ),
.csr_depc_o ( csr_depc ),
.debug_mode_i ( debug_mode ),
.debug_cause_i ( debug_cause ),
.debug_csr_save_i ( debug_csr_save ),
.debug_single_step_o ( debug_single_step ),
.debug_ebreakm_o ( debug_ebreakm ),
.debug_ebreaku_o ( debug_ebreaku ),
.trigger_match_o ( trigger_match ),
.pc_if_i ( pc_if ),
.pc_id_i ( pc_id ),
.pc_wb_i ( pc_wb ),
.pc_if_i ( pc_if ),
.pc_id_i ( pc_id ),
.pc_wb_i ( pc_wb ),
.data_ind_timing_o ( data_ind_timing ),
.dummy_instr_en_o ( dummy_instr_en ),
.dummy_instr_mask_o ( dummy_instr_mask ),
.dummy_instr_seed_en_o ( dummy_instr_seed_en ),
.dummy_instr_seed_o ( dummy_instr_seed ),
.icache_enable_o ( icache_enable ),
.csr_shadow_err_o ( csr_shadow_err ),
.data_ind_timing_o ( data_ind_timing ),
.dummy_instr_en_o ( dummy_instr_en ),
.dummy_instr_mask_o ( dummy_instr_mask ),
.dummy_instr_seed_en_o ( dummy_instr_seed_en ),
.dummy_instr_seed_o ( dummy_instr_seed ),
.icache_enable_o ( icache_enable ),
.csr_shadow_err_o ( csr_shadow_err ),
.csr_save_if_i ( csr_save_if ),
.csr_save_id_i ( csr_save_id ),
.csr_save_wb_i ( csr_save_wb ),
.csr_restore_mret_i ( csr_restore_mret_id ),
.csr_restore_dret_i ( csr_restore_dret_id ),
.csr_save_cause_i ( csr_save_cause ),
.csr_mcause_i ( exc_cause ),
.csr_mtval_i ( csr_mtval ),
.illegal_csr_insn_o ( illegal_csr_insn_id ),
.csr_save_if_i ( csr_save_if ),
.csr_save_id_i ( csr_save_id ),
.csr_save_wb_i ( csr_save_wb ),
.csr_restore_mret_i ( csr_restore_mret_id ),
.csr_restore_dret_i ( csr_restore_dret_id ),
.csr_save_cause_i ( csr_save_cause ),
.csr_mcause_i ( exc_cause ),
.csr_mtval_i ( csr_mtval ),
.illegal_csr_insn_o ( illegal_csr_insn_id ),
// performance counter related signals
.instr_ret_i ( instr_id_done ),
.instr_ret_compressed_i ( instr_id_done_compressed ),
.iside_wait_i ( perf_iside_wait ),
.jump_i ( perf_jump ),
.branch_i ( perf_branch ),
.branch_taken_i ( perf_tbranch ),
.mem_load_i ( perf_load ),
.mem_store_i ( perf_store ),
.dside_wait_i ( perf_dside_wait ),
.mul_wait_i ( perf_mul_wait ),
.div_wait_i ( perf_div_wait )
.instr_ret_i ( perf_instr_ret_wb ),
.instr_ret_compressed_i ( perf_instr_ret_compressed_wb ),
.iside_wait_i ( perf_iside_wait ),
.jump_i ( perf_jump ),
.branch_i ( perf_branch ),
.branch_taken_i ( perf_tbranch ),
.mem_load_i ( perf_load ),
.mem_store_i ( perf_store ),
.dside_wait_i ( perf_dside_wait ),
.mul_wait_i ( perf_mul_wait ),
.div_wait_i ( perf_div_wait )
);
// These assertions are in top-level as instr_valid_id required as the enable term

View file

@ -167,6 +167,7 @@ module ibex_id_stage #(
output logic en_wb_o,
output ibex_pkg::wb_instr_type_e instr_type_wb_o,
output logic instr_perf_count_id_o,
input logic ready_wb_i,
input logic outstanding_load_wb_i,
input logic outstanding_store_wb_i,
@ -179,8 +180,7 @@ module ibex_id_stage #(
// access to finish before proceeding
output logic perf_mul_wait_o,
output logic perf_div_wait_o,
output logic instr_id_done_o,
output logic instr_id_done_compressed_o
output logic instr_id_done_o
);
import ibex_pkg::*;
@ -917,8 +917,6 @@ module ibex_id_stage #(
lsu_we ? WB_INSTR_STORE :
WB_INSTR_LOAD;
assign en_wb_o = instr_done;
assign instr_id_done_o = en_wb_o & ready_wb_i;
// Stall ID/EX as instruction in ID/EX cannot proceed to writeback yet
@ -977,15 +975,21 @@ module ibex_id_stage #(
assign perf_dside_wait_o = instr_executing & lsu_req_dec & ~lsu_resp_valid_i;
assign en_wb_o = 1'b0;
assign instr_id_done_o = instr_done;
end
// Signal which instructions to count as retired in minstret, all traps along with ebrk and
// ecall instructions are not counted.
assign instr_perf_count_id_o = ~ebrk_insn & ~ecall_insn_dec & ~illegal_insn_dec &
~illegal_csr_insn_i & ~instr_fetch_err_i;
// An instruction is ready to move to the writeback stage (or retire if there is no writeback
// stage)
assign en_wb_o = instr_done;
assign perf_mul_wait_o = stall_multdiv & mult_en_dec;
assign perf_div_wait_o = stall_multdiv & div_en_dec;
assign instr_id_done_compressed_o = instr_id_done_o & instr_is_compressed_i;
////////////////
// Assertions //
////////////////

View file

@ -22,12 +22,16 @@ module ibex_wb_stage #(
input logic en_wb_i,
input ibex_pkg::wb_instr_type_e instr_type_wb_i,
input logic [31:0] pc_id_i,
input logic instr_is_compressed_id_i,
input logic instr_perf_count_id_i,
output logic ready_wb_o,
output logic rf_write_wb_o,
output logic outstanding_load_wb_o,
output logic outstanding_store_wb_o,
output logic [31:0] pc_wb_o,
output logic perf_instr_ret_wb_o,
output logic perf_instr_ret_compressed_wb_o,
input logic [4:0] rf_waddr_id_i,
input logic [31:0] rf_wdata_id_i,
@ -43,6 +47,7 @@ module ibex_wb_stage #(
output logic rf_we_wb_o,
input logic lsu_resp_valid_i,
input logic lsu_resp_err_i,
output logic instr_done_wb_o
);
@ -63,6 +68,8 @@ module ibex_wb_stage #(
logic wb_valid_q;
logic [31:0] wb_pc_q;
logic wb_compressed_q;
logic wb_count_q;
wb_instr_type_e wb_instr_type_q;
logic wb_valid_d;
@ -91,6 +98,8 @@ module ibex_wb_stage #(
rf_wdata_wb_q <= rf_wdata_id_i;
wb_instr_type_q <= instr_type_wb_i;
wb_pc_q <= pc_id_i;
wb_compressed_q <= instr_is_compressed_id_i;
wb_count_q <= instr_perf_count_id_i;
end
end
@ -111,6 +120,11 @@ module ibex_wb_stage #(
assign instr_done_wb_o = wb_valid_q & wb_done;
// Increment instruction retire counters for valid instructions which are not lsu errors
assign perf_instr_ret_wb_o = instr_done_wb_o & wb_count_q &
~(lsu_resp_valid_i & lsu_resp_err_i);
assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q;
// Forward data that will be written to the RF back to ID to resolve data hazards. The flopped
// rf_wdata_wb_q is used rather than rf_wdata_wb_o as the latter includes read data from memory
// that returns too late to be used on the forwarding path.
@ -121,6 +135,11 @@ module ibex_wb_stage #(
assign rf_wdata_wb_mux[0] = rf_wdata_id_i;
assign rf_wdata_wb_mux_we[0] = rf_we_id_i;
// Increment instruction retire counters for valid instructions which are not lsu errors
assign perf_instr_ret_wb_o = instr_perf_count_id_i & en_wb_i &
~(lsu_resp_valid_i & lsu_resp_err_i);
assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i;
// ready needs to be constant 1 without writeback stage (otherwise ID/EX stage will stall)
assign ready_wb_o = 1'b1;
@ -129,17 +148,13 @@ module ibex_wb_stage #(
// Tie-off outputs to constant values
logic unused_clk;
logic unused_rst;
logic unused_en_wb;
wb_instr_type_e unused_instr_type_wb;
logic [31:0] unused_pc_id;
logic unused_lsu_resp_valid;
assign unused_clk = clk_i;
assign unused_rst = rst_ni;
assign unused_en_wb = en_wb_i;
assign unused_instr_type_wb = instr_type_wb_i;
assign unused_pc_id = pc_id_i;
assign unused_lsu_resp_valid = lsu_resp_valid_i;
assign outstanding_load_wb_o = 1'b0;
assign outstanding_store_wb_o = 1'b0;