mirror of
https://github.com/openhwgroup/cve2.git
synced 2025-04-24 14:09:08 -04:00
[rtl] Fix retired instruction counters
When the writeback stage is present the retired instruction counter (minstret) and the retired compressed instruction counter could see an off by one error when an instruction was in the writeback stage when reading the counters. With this fix the ID stage observes the incremented value of the counters when an instruction that would increment them is in writeback.
This commit is contained in:
parent
75c030b776
commit
31b2f6c863
4 changed files with 134 additions and 58 deletions
|
@ -330,6 +330,8 @@ module ibex_core import ibex_pkg::*; #(
|
|||
|
||||
logic perf_instr_ret_wb;
|
||||
logic perf_instr_ret_compressed_wb;
|
||||
logic perf_instr_ret_wb_spec;
|
||||
logic perf_instr_ret_compressed_wb_spec;
|
||||
logic perf_iside_wait;
|
||||
logic perf_dside_wait;
|
||||
logic perf_mul_wait;
|
||||
|
@ -759,13 +761,15 @@ module ibex_core import ibex_pkg::*; #(
|
|||
.instr_is_compressed_id_i(instr_is_compressed_id),
|
||||
.instr_perf_count_id_i (instr_perf_count_id),
|
||||
|
||||
.ready_wb_o (ready_wb),
|
||||
.rf_write_wb_o (rf_write_wb),
|
||||
.outstanding_load_wb_o (outstanding_load_wb),
|
||||
.outstanding_store_wb_o (outstanding_store_wb),
|
||||
.pc_wb_o (pc_wb),
|
||||
.perf_instr_ret_wb_o (perf_instr_ret_wb),
|
||||
.perf_instr_ret_compressed_wb_o(perf_instr_ret_compressed_wb),
|
||||
.ready_wb_o (ready_wb),
|
||||
.rf_write_wb_o (rf_write_wb),
|
||||
.outstanding_load_wb_o (outstanding_load_wb),
|
||||
.outstanding_store_wb_o (outstanding_store_wb),
|
||||
.pc_wb_o (pc_wb),
|
||||
.perf_instr_ret_wb_o (perf_instr_ret_wb),
|
||||
.perf_instr_ret_compressed_wb_o (perf_instr_ret_compressed_wb),
|
||||
.perf_instr_ret_wb_spec_o (perf_instr_ret_wb_spec),
|
||||
.perf_instr_ret_compressed_wb_spec_o(perf_instr_ret_compressed_wb_spec),
|
||||
|
||||
.rf_waddr_id_i(rf_waddr_id),
|
||||
.rf_wdata_id_i(rf_wdata_id),
|
||||
|
@ -1010,17 +1014,19 @@ module ibex_core import ibex_pkg::*; #(
|
|||
.illegal_csr_insn_o(illegal_csr_insn_id),
|
||||
|
||||
// performance counter related signals
|
||||
.instr_ret_i (perf_instr_ret_wb),
|
||||
.instr_ret_compressed_i(perf_instr_ret_compressed_wb),
|
||||
.iside_wait_i (perf_iside_wait),
|
||||
.jump_i (perf_jump),
|
||||
.branch_i (perf_branch),
|
||||
.branch_taken_i (perf_tbranch),
|
||||
.mem_load_i (perf_load),
|
||||
.mem_store_i (perf_store),
|
||||
.dside_wait_i (perf_dside_wait),
|
||||
.mul_wait_i (perf_mul_wait),
|
||||
.div_wait_i (perf_div_wait)
|
||||
.instr_ret_i (perf_instr_ret_wb),
|
||||
.instr_ret_compressed_i (perf_instr_ret_compressed_wb),
|
||||
.instr_ret_spec_i (perf_instr_ret_wb_spec),
|
||||
.instr_ret_compressed_spec_i(perf_instr_ret_compressed_wb_spec),
|
||||
.iside_wait_i (perf_iside_wait),
|
||||
.jump_i (perf_jump),
|
||||
.branch_i (perf_branch),
|
||||
.branch_taken_i (perf_tbranch),
|
||||
.mem_load_i (perf_load),
|
||||
.mem_store_i (perf_store),
|
||||
.dside_wait_i (perf_dside_wait),
|
||||
.mul_wait_i (perf_mul_wait),
|
||||
.div_wait_i (perf_div_wait)
|
||||
);
|
||||
|
||||
// These assertions are in top-level as instr_valid_id required as the enable term
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
module ibex_counter #(
|
||||
parameter int CounterWidth = 32
|
||||
parameter int CounterWidth = 32,
|
||||
// When set `counter_val_upd_o` provides an incremented version of the counter value, otherwise
|
||||
// the output is hard-wired to 0. This is required to allow Xilinx DSP inference to work
|
||||
// correctly. When `ProvideValUpd` is set no DSPs are inferred.
|
||||
parameter bit ProvideValUpd = 0
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
@ -8,7 +12,8 @@ module ibex_counter #(
|
|||
input logic counterh_we_i,
|
||||
input logic counter_we_i,
|
||||
input logic [31:0] counter_val_i,
|
||||
output logic [63:0] counter_val_o
|
||||
output logic [63:0] counter_val_o,
|
||||
output logic [63:0] counter_val_upd_o
|
||||
);
|
||||
|
||||
logic [63:0] counter;
|
||||
|
@ -17,9 +22,11 @@ module ibex_counter #(
|
|||
logic we;
|
||||
logic [CounterWidth-1:0] counter_d;
|
||||
|
||||
// Increment
|
||||
assign counter_upd = counter[CounterWidth-1:0] + {{CounterWidth - 1{1'b0}}, 1'b1};
|
||||
|
||||
// Update
|
||||
always_comb begin
|
||||
|
||||
// Write
|
||||
we = counter_we_i | counterh_we_i;
|
||||
counter_load[63:32] = counter[63:32];
|
||||
|
@ -29,9 +36,6 @@ module ibex_counter #(
|
|||
counter_load[31:0] = counter[31:0];
|
||||
end
|
||||
|
||||
// Increment
|
||||
counter_upd = counter[CounterWidth-1:0] + {{CounterWidth - 1{1'b0}}, 1'b1};
|
||||
|
||||
// Next value logic
|
||||
if (we) begin
|
||||
counter_d = counter_load[CounterWidth-1:0];
|
||||
|
@ -67,11 +71,24 @@ module ibex_counter #(
|
|||
if (CounterWidth < 64) begin : g_counter_narrow
|
||||
logic [63:CounterWidth] unused_counter_load;
|
||||
|
||||
assign counter[CounterWidth-1:0] = counter_q;
|
||||
assign counter[63:CounterWidth] = '0;
|
||||
assign unused_counter_load = counter_load[63:CounterWidth];
|
||||
assign counter[CounterWidth-1:0] = counter_q;
|
||||
assign counter[63:CounterWidth] = '0;
|
||||
|
||||
if (ProvideValUpd) begin : g_counter_val_upd_o
|
||||
assign counter_val_upd_o[CounterWidth-1:0] = counter_upd;
|
||||
end else begin : g_no_counter_val_upd_o
|
||||
assign counter_val_upd_o[CounterWidth-1:0] = '0;
|
||||
end
|
||||
assign counter_val_upd_o[63:CounterWidth] = '0;
|
||||
assign unused_counter_load = counter_load[63:CounterWidth];
|
||||
end else begin : g_counter_full
|
||||
assign counter = counter_q;
|
||||
assign counter = counter_q;
|
||||
|
||||
if (ProvideValUpd) begin : g_counter_val_upd_o
|
||||
assign counter_val_upd_o = counter_upd;
|
||||
end else begin : g_no_counter_val_upd_o
|
||||
assign counter_val_upd_o = '0;
|
||||
end
|
||||
end
|
||||
|
||||
assign counter_val_o = counter;
|
||||
|
|
|
@ -106,17 +106,19 @@ module ibex_cs_registers #(
|
|||
// with wrong priviledge level, or
|
||||
// missing write permissions
|
||||
// Performance Counters
|
||||
input logic instr_ret_i, // instr retired in ID/EX stage
|
||||
input logic instr_ret_compressed_i, // compressed instr retired
|
||||
input logic iside_wait_i, // core waiting for the iside
|
||||
input logic jump_i, // jump instr seen (j, jr, jal, jalr)
|
||||
input logic branch_i, // branch instr seen (bf, bnf)
|
||||
input logic branch_taken_i, // branch was taken
|
||||
input logic mem_load_i, // load from memory in this cycle
|
||||
input logic mem_store_i, // store to memory in this cycle
|
||||
input logic dside_wait_i, // core waiting for the dside
|
||||
input logic mul_wait_i, // core waiting for multiply
|
||||
input logic div_wait_i // core waiting for divide
|
||||
input logic instr_ret_i, // instr retired in ID/EX stage
|
||||
input logic instr_ret_compressed_i, // compressed instr retired
|
||||
input logic instr_ret_spec_i, // speculative instr_ret_i
|
||||
input logic instr_ret_compressed_spec_i, // speculative instr_ret_compressed_i
|
||||
input logic iside_wait_i, // core waiting for the iside
|
||||
input logic jump_i, // jump instr seen (j, jr, jal, jalr)
|
||||
input logic branch_i, // branch instr seen (bf, bnf)
|
||||
input logic branch_taken_i, // branch was taken
|
||||
input logic mem_load_i, // load from memory in this cycle
|
||||
input logic mem_store_i, // store to memory in this cycle
|
||||
input logic dside_wait_i, // core waiting for the dside
|
||||
input logic mul_wait_i, // core waiting for multiply
|
||||
input logic div_wait_i // core waiting for divide
|
||||
);
|
||||
|
||||
import ibex_pkg::*;
|
||||
|
@ -242,6 +244,8 @@ module ibex_cs_registers #(
|
|||
logic unused_mhpmcounterh_we_1;
|
||||
logic unused_mhpmcounter_incr_1;
|
||||
|
||||
logic [63:0] minstret_next, minstret_raw;
|
||||
|
||||
// Debug / trigger registers
|
||||
logic [31:0] tselect_rdata;
|
||||
logic [31:0] tmatch_control_rdata;
|
||||
|
@ -1228,12 +1232,15 @@ module ibex_cs_registers #(
|
|||
.counterh_we_i(mhpmcounterh_we[0]),
|
||||
.counter_we_i(mhpmcounter_we[0]),
|
||||
.counter_val_i(csr_wdata_int),
|
||||
.counter_val_o(mhpmcounter[0])
|
||||
.counter_val_o(mhpmcounter[0]),
|
||||
.counter_val_upd_o()
|
||||
);
|
||||
|
||||
|
||||
// minstret
|
||||
ibex_counter #(
|
||||
.CounterWidth(64)
|
||||
.CounterWidth(64),
|
||||
.ProvideValUpd(1)
|
||||
) minstret_counter_i (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
|
@ -1241,30 +1248,65 @@ module ibex_cs_registers #(
|
|||
.counterh_we_i(mhpmcounterh_we[2]),
|
||||
.counter_we_i(mhpmcounter_we[2]),
|
||||
.counter_val_i(csr_wdata_int),
|
||||
.counter_val_o(mhpmcounter[2])
|
||||
.counter_val_o(minstret_raw),
|
||||
.counter_val_upd_o(minstret_next)
|
||||
);
|
||||
|
||||
// Where the writeback stage is present instruction in ID observing value of minstret must take
|
||||
// into account any instruction in the writeback stage. If one is present the incremented value of
|
||||
// minstret is used. A speculative version of the signal is used to aid timing. When the writeback
|
||||
// stage sees an exception (so the speculative signal is incorrect) the ID stage will be flushed
|
||||
// so the incorrect value doesn't matter. A similar behaviour is required for the compressed
|
||||
// instruction retired counter below. When the writeback stage isn't present the speculative
|
||||
// signals are always 0.
|
||||
assign mhpmcounter[2] = instr_ret_spec_i & ~mcountinhibit[2] ? minstret_next : minstret_raw;
|
||||
|
||||
// reserved:
|
||||
assign mhpmcounter[1] = '0;
|
||||
assign unused_mhpmcounter_we_1 = mhpmcounter_we[1];
|
||||
assign unused_mhpmcounterh_we_1 = mhpmcounterh_we[1];
|
||||
assign unused_mhpmcounter_incr_1 = mhpmcounter_incr[1];
|
||||
|
||||
for (genvar cnt = 0; cnt < 29; cnt++) begin : gen_cntrs
|
||||
if (cnt < MHPMCounterNum) begin : gen_imp
|
||||
// Iterate through optionally included counters (MHPMCounterNum controls how many are included)
|
||||
for (genvar i = 0; i < 29; i++) begin : gen_cntrs
|
||||
localparam int Cnt = i + 3;
|
||||
|
||||
if (i < MHPMCounterNum) begin : gen_imp
|
||||
logic [63:0] mhpmcounter_raw, mhpmcounter_next;
|
||||
|
||||
ibex_counter #(
|
||||
.CounterWidth(MHPMCounterWidth)
|
||||
.CounterWidth(MHPMCounterWidth),
|
||||
.ProvideValUpd(Cnt == 10)
|
||||
) mcounters_variable_i (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.counter_inc_i(mhpmcounter_incr[cnt+3] & ~mcountinhibit[cnt+3]),
|
||||
.counterh_we_i(mhpmcounterh_we[cnt+3]),
|
||||
.counter_we_i(mhpmcounter_we[cnt+3]),
|
||||
.counter_inc_i(mhpmcounter_incr[Cnt] & ~mcountinhibit[Cnt]),
|
||||
.counterh_we_i(mhpmcounterh_we[Cnt]),
|
||||
.counter_we_i(mhpmcounter_we[Cnt]),
|
||||
.counter_val_i(csr_wdata_int),
|
||||
.counter_val_o(mhpmcounter[cnt+3])
|
||||
.counter_val_o(mhpmcounter_raw),
|
||||
.counter_val_upd_o(mhpmcounter_next)
|
||||
);
|
||||
|
||||
if (Cnt == 10) begin : gen_compressed_instr_cnt
|
||||
// Special behaviour for reading compressed instruction retired counter, see comment on
|
||||
// `mhpmcounter[2]` above for further information.
|
||||
assign mhpmcounter[Cnt] =
|
||||
instr_ret_compressed_spec_i & ~mcountinhibit[Cnt] ? mhpmcounter_next:
|
||||
mhpmcounter_raw;
|
||||
end else begin : gen_other_cnts
|
||||
logic [63:0] unused_mhpmcounter_next;
|
||||
// All other counters just see the raw counter value directly.
|
||||
assign mhpmcounter[Cnt] = mhpmcounter_raw;
|
||||
assign unused_mhpmcounter_next = mhpmcounter_next;
|
||||
end
|
||||
end else begin : gen_unimp
|
||||
assign mhpmcounter[cnt+3] = '0;
|
||||
assign mhpmcounter[Cnt] = '0;
|
||||
|
||||
if (Cnt == 10) begin : gen_no_compressed_instr_cnt
|
||||
logic unused_instr_ret_compressed_spec_i;
|
||||
assign unused_instr_ret_compressed_spec_i = instr_ret_compressed_spec_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@ module ibex_wb_stage #(
|
|||
output logic [31:0] pc_wb_o,
|
||||
output logic perf_instr_ret_wb_o,
|
||||
output logic perf_instr_ret_compressed_wb_o,
|
||||
output logic perf_instr_ret_wb_spec_o,
|
||||
output logic perf_instr_ret_compressed_wb_spec_o,
|
||||
|
||||
input logic [4:0] rf_waddr_id_i,
|
||||
input logic [31:0] rf_wdata_id_i,
|
||||
|
@ -144,10 +146,15 @@ module ibex_wb_stage #(
|
|||
|
||||
assign instr_done_wb_o = wb_valid_q & wb_done;
|
||||
|
||||
// Increment instruction retire counters for valid instructions which are not lsu errors
|
||||
assign perf_instr_ret_wb_o = instr_done_wb_o & wb_count_q &
|
||||
~(lsu_resp_valid_i & lsu_resp_err_i);
|
||||
assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q;
|
||||
// Increment instruction retire counters for valid instructions which are not lsu errors.
|
||||
// Speculative versions of the signals do not factor in exceptions and whether the instruction
|
||||
// is done yet. These are used to get correct values for instructions reading the relevant
|
||||
// performance counters in the ID stage.
|
||||
assign perf_instr_ret_wb_spec_o = wb_count_q;
|
||||
assign perf_instr_ret_compressed_wb_spec_o = perf_instr_ret_wb_spec_o & wb_compressed_q;
|
||||
assign perf_instr_ret_wb_o = instr_done_wb_o & wb_count_q &
|
||||
~(lsu_resp_valid_i & lsu_resp_err_i);
|
||||
assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & wb_compressed_q;
|
||||
|
||||
// Forward data that will be written to the RF back to ID to resolve data hazards. The flopped
|
||||
// rf_wdata_wb_q is used rather than rf_wdata_wb_o as the latter includes read data from memory
|
||||
|
@ -159,10 +166,14 @@ module ibex_wb_stage #(
|
|||
assign rf_wdata_wb_mux[0] = rf_wdata_id_i;
|
||||
assign rf_wdata_wb_mux_we[0] = rf_we_id_i;
|
||||
|
||||
// Increment instruction retire counters for valid instructions which are not lsu errors
|
||||
assign perf_instr_ret_wb_o = instr_perf_count_id_i & en_wb_i &
|
||||
~(lsu_resp_valid_i & lsu_resp_err_i);
|
||||
assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i;
|
||||
// Increment instruction retire counters for valid instructions which are not lsu errors.
|
||||
// The speculative signals are always 0 when no writeback stage is present as the raw counter
|
||||
// values will be correct.
|
||||
assign perf_instr_ret_wb_spec_o = 1'b0;
|
||||
assign perf_instr_ret_compressed_wb_spec_o = 1'b0;
|
||||
assign perf_instr_ret_wb_o = instr_perf_count_id_i & en_wb_i &
|
||||
~(lsu_resp_valid_i & lsu_resp_err_i);
|
||||
assign perf_instr_ret_compressed_wb_o = perf_instr_ret_wb_o & instr_is_compressed_id_i;
|
||||
|
||||
// ready needs to be constant 1 without writeback stage (otherwise ID/EX stage will stall)
|
||||
assign ready_wb_o = 1'b1;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue