Add performance counters

This commit is contained in:
Andreas Traber 2015-08-28 09:57:37 +02:00
parent 8c4a99b5ec
commit d99621f699
4 changed files with 285 additions and 25 deletions

View file

@ -135,7 +135,13 @@ module controller
output logic stall_if_o, // Stall IF stage (deassert requests)
output logic stall_id_o, // Stall ID stage (and instr and data memory interface) ( ID_STAGE )
output logic stall_ex_o, // Stall ex stage ( EX_STAGE )
output logic stall_wb_o // Stall write to register file due contentions ( WB_STAGE )
output logic stall_wb_o, // Stall write to register file due contentions ( WB_STAGE )
// Performance Counters
output logic perf_jump_o, // we are executing a jump instruction (j, jr, jal, jalr)
output logic perf_branch_o, // we are executing a branch instruction (bf, bnf)
output logic perf_jr_stall_o, // stall due to jump-register-hazard
output logic perf_ld_stall_o // stall due to load-use-hazard
);
// FSM state encoding
@ -159,6 +165,7 @@ module controller
logic data_we;
logic data_req;
logic [1:0] jump_in_id;
logic [1:0] csr_op;
logic deassert_we;
logic lsu_stall;
@ -220,7 +227,7 @@ module controller
immediate_mux_sel_o = `IMM_I;
csr_access_o = 1'b0;
csr_op_o = `CSR_OP_NONE;
csr_op = `CSR_OP_NONE;
data_we = 1'b0;
data_type_o = 2'b00;
@ -898,9 +905,9 @@ module controller
end
unique case (instr_rdata_i[13:12])
2'b01: csr_op_o = `CSR_OP_WRITE;
2'b10: csr_op_o = `CSR_OP_SET;
2'b11: csr_op_o = `CSR_OP_CLEAR;
2'b01: csr_op = `CSR_OP_WRITE;
2'b10: csr_op = `CSR_OP_SET;
2'b11: csr_op = `CSR_OP_CLEAR;
default: illegal_insn_int = 1'b1;
endcase
end
@ -1242,6 +1249,7 @@ module controller
assign regfile_alu_we_o = (deassert_we) ? 1'b0 : regfile_alu_we;
assign data_we_o = (deassert_we) ? 1'b0 : data_we;
assign data_req_o = (deassert_we) ? 1'b0 : data_req;
assign csr_op_o = (deassert_we) ? `CSR_OP_NONE : csr_op;
assign jump_in_id_o = (deassert_we) ? `BRANCH_NONE : jump_in_id;
@ -1350,4 +1358,10 @@ module controller
end
end
// Performance Counters
assign perf_jump_o = (jump_in_id_o == `BRANCH_JAL || jump_in_id_o == `BRANCH_JALR);
assign perf_branch_o = (jump_in_id_o == `BRANCH_COND);
assign perf_jr_stall_o = jr_stall;
assign perf_ld_stall_o = load_stall;
endmodule // controller

View file

@ -26,6 +26,9 @@
module cs_registers
#(
parameter N_EXT_PERF_COUNTERS = 0
)
(
// Clock and Reset
input logic clk,
@ -57,9 +60,49 @@ module cs_registers
output logic [31:0] hwlp_end_o,
output logic [31:0] hwlp_counter_o,
output logic [1:0] hwlp_regid_o,
output logic [2:0] hwlp_we_o
output logic [2:0] hwlp_we_o,
// Performance Counters
input logic stall_id_i, // Stall ID stage
input logic instr_fetch_i, // instruction fetch
input logic jump_i, // jump instruction seen (j, jr, jal, jalr)
input logic branch_i, // branch instruction seen (bf, bnf)
input logic ld_stall_i, // load use hazard
input logic jr_stall_i, // jump register use hazard
input logic mem_load_i, // load from memory in this cycle
input logic mem_store_i, // store to memory in this cycle
input logic [N_EXT_PERF_COUNTERS-1:0] ext_counters_i
);
localparam N_PERF_COUNTERS = 9 + N_EXT_PERF_COUNTERS;
`ifdef PULP_FPGA_EMUL
localparam N_PERF_REGS = N_PERF_COUNTERS;
`elsif SYNTHESIS
localparam N_PERF_REGS = 1;
`else
localparam N_PERF_REGS = N_PERF_COUNTERS;
`endif
// Performance Counter Signals
logic stall_id_q;
logic [N_PERF_COUNTERS-1:0] PCCR_in; // input signals for each counter category
logic [N_PERF_COUNTERS-1:0] PCCR_inc, PCCR_inc_q; // should the counter be increased?
logic [N_PERF_REGS-1:0] [31:0] PCCR_q, PCCR_n; // performance counters counter register
logic [1:0] PCMR_n, PCMR_q; // mode register, controls saturation and global enable
logic [N_PERF_COUNTERS-1:0] PCER_n, PCER_q; // selected counter input
logic [31:0] perf_rdata;
logic [4:0] pccr_index;
logic pccr_all_sel;
logic is_pccr;
logic is_pcer;
logic is_pcmr;
logic is_constant;
logic is_register;
@ -88,6 +131,8 @@ module cs_registers
csr_rdata_o = constant_rdata_int;
else if (is_register == 1'b1)
csr_rdata_o = register_rdata_int;
else // must be performance counter
csr_rdata_o = perf_rdata;
end
@ -108,17 +153,16 @@ module cs_registers
// address decoder for regular CSRs
always_comb
begin
csr_index = '0;
is_register = 1'b1;
csr_index = '0;
is_register = 1'b1;
unique case (csr_addr_i)
12'h340: csr_index = `CSR_IDX_MSCRATCH;
12'h341: csr_index = `CSR_IDX_MEPC;
12'h340: csr_index = `CSR_IDX_MSCRATCH;
12'h341: csr_index = `CSR_IDX_MEPC;
default: is_register = 1'b0;
endcase
end
assign register_rdata_int = csr[csr_index];
// directly output some registers
@ -135,7 +179,7 @@ module cs_registers
else
begin
// write CSR through instruction
if (is_readonly == 1'b0) begin
if (is_readonly == 1'b0 && is_pccr == 1'b0) begin
unique case (csr_op_i)
`CSR_OP_NONE: ;
`CSR_OP_WRITE: csr[csr_index] <= csr_wdata_i;
@ -155,9 +199,170 @@ module cs_registers
end
// synopsys translate_off
// make sure decoding works correctly
//assert property (!((is_constant == 1'b1) && (is_register == 1'b1))); // not supported by ModelSim :/
// synopsys translate_on
/////////////////////////////////////////////////////////////////
// ____ __ ____ _ //
// | _ \ ___ _ __ / _| / ___|___ _ _ _ __ | |_ ___ _ __ //
// | |_) / _ \ '__| |_ | | / _ \| | | | '_ \| __/ _ \ '__| //
// | __/ __/ | | _| | |__| (_) | |_| | | | | || __/ | //
// |_| \___|_| |_|(_) \____\___/ \__,_|_| |_|\__\___|_| //
// //
/////////////////////////////////////////////////////////////////
assign PCCR_in[0] = 1'b1; // cycle counter
assign PCCR_in[1] = ~stall_id_i; // instruction counter
assign PCCR_in[2] = ld_stall_i & (~stall_id_q); // nr of load use hazards
assign PCCR_in[3] = jr_stall_i & (~stall_id_q); // nr of jump register hazards
assign PCCR_in[4] = instr_fetch_i; // cycles waiting for instruction fetches
assign PCCR_in[5] = mem_load_i; // nr of loads
assign PCCR_in[6] = mem_store_i; // nr of stores
assign PCCR_in[7] = jump_i & (~stall_id_q); // nr of jumps (unconditional)
assign PCCR_in[8] = branch_i & (~stall_id_q); // nr of branches (conditional)
// assign external performance counters
generate
genvar i;
for(i = 0; i < N_EXT_PERF_COUNTERS; i++)
assign PCCR_in[N_PERF_COUNTERS - N_EXT_PERF_COUNTERS + i] = ext_counters_i[i];
endgenerate
// address decoder for performance counter registers
always_comb
begin
is_pccr = 1'b0;
is_pcmr = 1'b0;
is_pcer = 1'b0;
pccr_all_sel = 1'b0;
pccr_index = '0;
perf_rdata = '0;
unique case (csr_addr_i)
12'h7A0: begin
is_pcer = 1'b1;
perf_rdata[N_PERF_COUNTERS-1:0] = PCER_q;
end
12'h7A1: begin
is_pcmr = 1'b1;
perf_rdata[1:0] = PCMR_q;
end
12'h79F: begin
is_pccr = 1'b1;
pccr_all_sel = 1'b1;
end
default:;
endcase
// look for 780 to 79F, Performance Counter Counter Registers
if (csr_addr_i[11:5] == 7'b0111100) begin
is_pccr = 1'b1;
pccr_index = csr_addr_i[4:0];
perf_rdata = PCCR_q[csr_addr_i[4:0]];
end
end
// performance counter counter update logic
`ifdef SYNTHESIS
// for synthesis we just have one performance counter register
assign PCCR_inc[0] = (|(PCCR_in & PCER_q)) & PCMR_q[0];
always_comb
begin
PCCR_n[0] = PCCR_q[0];
if ((PCCR_inc_q[0] == 1'b1) && ((PCCR_q[0] != 32'hFFFFFFFF) || (csr[`CSR_IDX_PCMR][1] == 1'b0)))
PCCR_n[0] = PCCR_q[0] + 1;
if (is_pccr == 1'b1) begin
unique case (csr_op_i)
`CSR_OP_NONE: ;
`CSR_OP_WRITE: PCCR_n[i] <= csr_wdata_i;
`CSR_OP_SET: PCCR_n[i] <= csr_wdata_i | PCCR_q[i];
`CSR_OP_CLEAR: PCCR_n[i] <= csr_wdata_i & ~(PCCR_q[i]);
endcase
end
end
`else
always_comb
begin
for(int i = 0; i < N_PERF_COUNTERS; i++)
begin : PERF_CNT_INC
PCCR_inc[i] = PCCR_in[i] & PCER_q[i] & PCMR_q[0];
PCCR_n[i] = PCCR_q[i];
if ((PCCR_inc_q[i] == 1'b1) && ((PCCR_q[i] != 32'hFFFFFFFF) || (PCMR_q[1] == 1'b0)))
PCCR_n[i] = PCCR_q[i] + 1;
if (is_pccr == 1'b1 && (pccr_all_sel == 1'b1 || pccr_index == i)) begin
unique case (csr_op_i)
`CSR_OP_NONE: ;
`CSR_OP_WRITE: PCCR_n[i] <= csr_wdata_i;
`CSR_OP_SET: PCCR_n[i] <= csr_wdata_i | PCCR_q[i];
`CSR_OP_CLEAR: PCCR_n[i] <= csr_wdata_i & ~(PCCR_q[i]);
endcase
end
end
end
`endif
// update PCMR and PCER
always_comb
begin
PCMR_n = PCMR_q;
PCER_n = PCER_q;
if (is_pcmr) begin
unique case (csr_op_i)
`CSR_OP_NONE: ;
`CSR_OP_WRITE: PCMR_n <= csr_wdata_i;
`CSR_OP_SET: PCMR_n <= csr_wdata_i | PCMR_q;
`CSR_OP_CLEAR: PCMR_n <= csr_wdata_i & ~(PCMR_q);
endcase
end
if (is_pcer) begin
unique case (csr_op_i)
`CSR_OP_NONE: ;
`CSR_OP_WRITE: PCER_n <= csr_wdata_i;
`CSR_OP_SET: PCER_n <= csr_wdata_i | PCER_q;
`CSR_OP_CLEAR: PCER_n <= csr_wdata_i & ~(PCER_q);
endcase
end
end
// Performance Counter Registers
always_ff @(posedge clk, negedge rst_n)
begin
if (rst_n == 1'b0)
begin
stall_id_q <= 1'b0;
PCER_q <= 'h0;
PCMR_q <= 2'h3;
for(int i = 0; i < N_PERF_REGS; i++)
begin
PCCR_q[i] <= 'h0;
PCCR_inc_q[i] <= 'h0;
end
end
else
begin
stall_id_q <= stall_id_i;
PCER_q <= PCER_n;
PCMR_q <= PCMR_n;
for(int i = 0; i < N_PERF_REGS; i++)
begin
PCCR_q[i] <= PCCR_n[i];
PCCR_inc_q[i] <= PCCR_inc[i];
end
end
end
endmodule

View file

@ -149,13 +149,17 @@ module id_stage
input logic [4:0] regfile_alu_waddr_fw_i,
input logic regfile_alu_we_fw_i,
input logic [31:0] regfile_alu_wdata_fw_i
input logic [31:0] regfile_alu_wdata_fw_i,
`ifdef TCDM_ADDR_PRECAL
,
output logic [31:0] alu_adder_o
output logic [31:0] alu_adder_o,
`endif
// Performance Counters
output logic perf_jump_o, // we are executing a jump instruction (j, jr, jal, jalr)
output logic perf_branch_o, // we are executing a branch instruction (bf, bnf)
output logic perf_jr_stall_o, // jump-register-hazard
output logic perf_ld_stall_o // load-use-hazard
);
@ -643,7 +647,14 @@ module id_stage
.stall_if_o ( stall_if_o ),
.stall_id_o ( stall_id_o ),
.stall_ex_o ( stall_ex_o ),
.stall_wb_o ( stall_wb_o )
.stall_wb_o ( stall_wb_o ),
// Performance Counters
.perf_jump_o ( perf_jump_o ),
.perf_branch_o ( perf_branch_o ),
.perf_jr_stall_o ( perf_jr_stall_o ),
.perf_ld_stall_o ( perf_ld_stall_o )
);
///////////////////////////////////////////////////////////////////////

View file

@ -29,6 +29,9 @@
module riscv_core
#(
parameter N_EXT_PERF_COUNTERS = 0
)
(
// Clock and Reset
input logic clk,
@ -72,7 +75,9 @@ module riscv_core
// CPU Control Signals
input logic fetch_enable_i,
output logic core_busy_o
output logic core_busy_o,
input logic [N_EXT_PERF_COUNTERS-1:0] ext_perf_counters_i
);
@ -232,6 +237,12 @@ module riscv_core
logic [31:0] alu_adder_ex;
`endif
// Performance Counters
logic perf_jump;
logic perf_branch;
logic perf_jr_stall;
logic perf_ld_stall;
//////////////////////////////////////////////////
@ -419,11 +430,15 @@ module riscv_core
.regfile_waddr_wb_i ( regfile_waddr_fw_wb_o ), // Write address ex-wb pipeline
.regfile_we_wb_i ( regfile_we_wb ), // write enable for the register file
.regfile_wdata_wb_i ( regfile_wdata ) // write data to commit in the register file
.regfile_wdata_wb_i ( regfile_wdata ), // write data to commit in the register file
`ifdef TCDM_ADDR_PRECAL
,
.alu_adder_o ( alu_adder_ex )
.alu_adder_o ( alu_adder_ex ),
`endif
.perf_jump_o ( perf_jump ),
.perf_branch_o ( perf_branch ),
.perf_jr_stall_o ( perf_jr_stall ),
.perf_ld_stall_o ( perf_ld_stall )
);
@ -609,7 +624,22 @@ module riscv_core
.curr_pc_id_i ( current_pc_id ), // from IF stage
.save_pc_if_i ( save_pc_if ),
.save_pc_id_i ( save_pc_id ),
.epcr_o ( epcr )
.epcr_o ( epcr ),
// performance counter related signals
.stall_id_i ( stall_id ),
.instr_fetch_i ( ~instr_ack_int ),
.jump_i ( perf_jump ),
.branch_i ( perf_branch ),
.ld_stall_i ( perf_ld_stall ),
.jr_stall_i ( perf_jr_stall ),
.mem_load_i ( data_req_o & data_gnt_i & (~data_we_o) ),
.mem_store_i ( data_req_o & data_gnt_i & data_we_o ),
.ext_counters_i ( ext_perf_counters_i )
);
// Mux for SPR access through Debug Unit