mirror of
https://github.com/openhwgroup/cve2.git
synced 2025-04-22 13:07:46 -04:00
Add performance counters
This commit is contained in:
parent
8c4a99b5ec
commit
d99621f699
4 changed files with 285 additions and 25 deletions
|
@ -135,7 +135,13 @@ module controller
|
|||
output logic stall_if_o, // Stall IF stage (deassert requests)
|
||||
output logic stall_id_o, // Stall ID stage (and instr and data memory interface) ( ID_STAGE )
|
||||
output logic stall_ex_o, // Stall ex stage ( EX_STAGE )
|
||||
output logic stall_wb_o // Stall write to register file due contentions ( WB_STAGE )
|
||||
output logic stall_wb_o, // Stall write to register file due contentions ( WB_STAGE )
|
||||
|
||||
// Performance Counters
|
||||
output logic perf_jump_o, // we are executing a jump instruction (j, jr, jal, jalr)
|
||||
output logic perf_branch_o, // we are executing a branch instruction (bf, bnf)
|
||||
output logic perf_jr_stall_o, // stall due to jump-register-hazard
|
||||
output logic perf_ld_stall_o // stall due to load-use-hazard
|
||||
);
|
||||
|
||||
// FSM state encoding
|
||||
|
@ -159,6 +165,7 @@ module controller
|
|||
logic data_we;
|
||||
logic data_req;
|
||||
logic [1:0] jump_in_id;
|
||||
logic [1:0] csr_op;
|
||||
logic deassert_we;
|
||||
|
||||
logic lsu_stall;
|
||||
|
@ -220,7 +227,7 @@ module controller
|
|||
immediate_mux_sel_o = `IMM_I;
|
||||
|
||||
csr_access_o = 1'b0;
|
||||
csr_op_o = `CSR_OP_NONE;
|
||||
csr_op = `CSR_OP_NONE;
|
||||
|
||||
data_we = 1'b0;
|
||||
data_type_o = 2'b00;
|
||||
|
@ -898,9 +905,9 @@ module controller
|
|||
end
|
||||
|
||||
unique case (instr_rdata_i[13:12])
|
||||
2'b01: csr_op_o = `CSR_OP_WRITE;
|
||||
2'b10: csr_op_o = `CSR_OP_SET;
|
||||
2'b11: csr_op_o = `CSR_OP_CLEAR;
|
||||
2'b01: csr_op = `CSR_OP_WRITE;
|
||||
2'b10: csr_op = `CSR_OP_SET;
|
||||
2'b11: csr_op = `CSR_OP_CLEAR;
|
||||
default: illegal_insn_int = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
@ -1242,6 +1249,7 @@ module controller
|
|||
assign regfile_alu_we_o = (deassert_we) ? 1'b0 : regfile_alu_we;
|
||||
assign data_we_o = (deassert_we) ? 1'b0 : data_we;
|
||||
assign data_req_o = (deassert_we) ? 1'b0 : data_req;
|
||||
assign csr_op_o = (deassert_we) ? `CSR_OP_NONE : csr_op;
|
||||
assign jump_in_id_o = (deassert_we) ? `BRANCH_NONE : jump_in_id;
|
||||
|
||||
|
||||
|
@ -1350,4 +1358,10 @@ module controller
|
|||
end
|
||||
end
|
||||
|
||||
// Performance Counters
|
||||
assign perf_jump_o = (jump_in_id_o == `BRANCH_JAL || jump_in_id_o == `BRANCH_JALR);
|
||||
assign perf_branch_o = (jump_in_id_o == `BRANCH_COND);
|
||||
assign perf_jr_stall_o = jr_stall;
|
||||
assign perf_ld_stall_o = load_stall;
|
||||
|
||||
endmodule // controller
|
||||
|
|
227
cs_registers.sv
227
cs_registers.sv
|
@ -26,6 +26,9 @@
|
|||
|
||||
|
||||
module cs_registers
|
||||
#(
|
||||
parameter N_EXT_PERF_COUNTERS = 0
|
||||
)
|
||||
(
|
||||
// Clock and Reset
|
||||
input logic clk,
|
||||
|
@ -57,9 +60,49 @@ module cs_registers
|
|||
output logic [31:0] hwlp_end_o,
|
||||
output logic [31:0] hwlp_counter_o,
|
||||
output logic [1:0] hwlp_regid_o,
|
||||
output logic [2:0] hwlp_we_o
|
||||
output logic [2:0] hwlp_we_o,
|
||||
|
||||
// Performance Counters
|
||||
input logic stall_id_i, // Stall ID stage
|
||||
|
||||
input logic instr_fetch_i, // instruction fetch
|
||||
|
||||
input logic jump_i, // jump instruction seen (j, jr, jal, jalr)
|
||||
input logic branch_i, // branch instruction seen (bf, bnf)
|
||||
input logic ld_stall_i, // load use hazard
|
||||
input logic jr_stall_i, // jump register use hazard
|
||||
|
||||
input logic mem_load_i, // load from memory in this cycle
|
||||
input logic mem_store_i, // store to memory in this cycle
|
||||
|
||||
input logic [N_EXT_PERF_COUNTERS-1:0] ext_counters_i
|
||||
);
|
||||
|
||||
localparam N_PERF_COUNTERS = 9 + N_EXT_PERF_COUNTERS;
|
||||
|
||||
`ifdef PULP_FPGA_EMUL
|
||||
localparam N_PERF_REGS = N_PERF_COUNTERS;
|
||||
`elsif SYNTHESIS
|
||||
localparam N_PERF_REGS = 1;
|
||||
`else
|
||||
localparam N_PERF_REGS = N_PERF_COUNTERS;
|
||||
`endif
|
||||
|
||||
// Performance Counter Signals
|
||||
logic stall_id_q;
|
||||
logic [N_PERF_COUNTERS-1:0] PCCR_in; // input signals for each counter category
|
||||
logic [N_PERF_COUNTERS-1:0] PCCR_inc, PCCR_inc_q; // should the counter be increased?
|
||||
|
||||
logic [N_PERF_REGS-1:0] [31:0] PCCR_q, PCCR_n; // performance counters counter register
|
||||
logic [1:0] PCMR_n, PCMR_q; // mode register, controls saturation and global enable
|
||||
logic [N_PERF_COUNTERS-1:0] PCER_n, PCER_q; // selected counter input
|
||||
|
||||
logic [31:0] perf_rdata;
|
||||
logic [4:0] pccr_index;
|
||||
logic pccr_all_sel;
|
||||
logic is_pccr;
|
||||
logic is_pcer;
|
||||
logic is_pcmr;
|
||||
|
||||
logic is_constant;
|
||||
logic is_register;
|
||||
|
@ -88,6 +131,8 @@ module cs_registers
|
|||
csr_rdata_o = constant_rdata_int;
|
||||
else if (is_register == 1'b1)
|
||||
csr_rdata_o = register_rdata_int;
|
||||
else // must be performance counter
|
||||
csr_rdata_o = perf_rdata;
|
||||
end
|
||||
|
||||
|
||||
|
@ -108,17 +153,16 @@ module cs_registers
|
|||
// address decoder for regular CSRs
|
||||
always_comb
|
||||
begin
|
||||
csr_index = '0;
|
||||
is_register = 1'b1;
|
||||
csr_index = '0;
|
||||
is_register = 1'b1;
|
||||
unique case (csr_addr_i)
|
||||
12'h340: csr_index = `CSR_IDX_MSCRATCH;
|
||||
12'h341: csr_index = `CSR_IDX_MEPC;
|
||||
12'h340: csr_index = `CSR_IDX_MSCRATCH;
|
||||
12'h341: csr_index = `CSR_IDX_MEPC;
|
||||
|
||||
default: is_register = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign register_rdata_int = csr[csr_index];
|
||||
|
||||
|
||||
// directly output some registers
|
||||
|
@ -135,7 +179,7 @@ module cs_registers
|
|||
else
|
||||
begin
|
||||
// write CSR through instruction
|
||||
if (is_readonly == 1'b0) begin
|
||||
if (is_readonly == 1'b0 && is_pccr == 1'b0) begin
|
||||
unique case (csr_op_i)
|
||||
`CSR_OP_NONE: ;
|
||||
`CSR_OP_WRITE: csr[csr_index] <= csr_wdata_i;
|
||||
|
@ -155,9 +199,170 @@ module cs_registers
|
|||
end
|
||||
|
||||
|
||||
// synopsys translate_off
|
||||
// make sure decoding works correctly
|
||||
//assert property (!((is_constant == 1'b1) && (is_register == 1'b1))); // not supported by ModelSim :/
|
||||
// synopsys translate_on
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// ____ __ ____ _ //
|
||||
// | _ \ ___ _ __ / _| / ___|___ _ _ _ __ | |_ ___ _ __ //
|
||||
// | |_) / _ \ '__| |_ | | / _ \| | | | '_ \| __/ _ \ '__| //
|
||||
// | __/ __/ | | _| | |__| (_) | |_| | | | | || __/ | //
|
||||
// |_| \___|_| |_|(_) \____\___/ \__,_|_| |_|\__\___|_| //
|
||||
// //
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
assign PCCR_in[0] = 1'b1; // cycle counter
|
||||
assign PCCR_in[1] = ~stall_id_i; // instruction counter
|
||||
assign PCCR_in[2] = ld_stall_i & (~stall_id_q); // nr of load use hazards
|
||||
assign PCCR_in[3] = jr_stall_i & (~stall_id_q); // nr of jump register hazards
|
||||
assign PCCR_in[4] = instr_fetch_i; // cycles waiting for instruction fetches
|
||||
assign PCCR_in[5] = mem_load_i; // nr of loads
|
||||
assign PCCR_in[6] = mem_store_i; // nr of stores
|
||||
assign PCCR_in[7] = jump_i & (~stall_id_q); // nr of jumps (unconditional)
|
||||
assign PCCR_in[8] = branch_i & (~stall_id_q); // nr of branches (conditional)
|
||||
|
||||
// assign external performance counters
|
||||
generate
|
||||
genvar i;
|
||||
for(i = 0; i < N_EXT_PERF_COUNTERS; i++)
|
||||
assign PCCR_in[N_PERF_COUNTERS - N_EXT_PERF_COUNTERS + i] = ext_counters_i[i];
|
||||
endgenerate
|
||||
|
||||
// address decoder for performance counter registers
|
||||
always_comb
|
||||
begin
|
||||
is_pccr = 1'b0;
|
||||
is_pcmr = 1'b0;
|
||||
is_pcer = 1'b0;
|
||||
pccr_all_sel = 1'b0;
|
||||
pccr_index = '0;
|
||||
perf_rdata = '0;
|
||||
|
||||
unique case (csr_addr_i)
|
||||
12'h7A0: begin
|
||||
is_pcer = 1'b1;
|
||||
perf_rdata[N_PERF_COUNTERS-1:0] = PCER_q;
|
||||
end
|
||||
12'h7A1: begin
|
||||
is_pcmr = 1'b1;
|
||||
perf_rdata[1:0] = PCMR_q;
|
||||
end
|
||||
12'h79F: begin
|
||||
is_pccr = 1'b1;
|
||||
pccr_all_sel = 1'b1;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
||||
// look for 780 to 79F, Performance Counter Counter Registers
|
||||
if (csr_addr_i[11:5] == 7'b0111100) begin
|
||||
is_pccr = 1'b1;
|
||||
|
||||
pccr_index = csr_addr_i[4:0];
|
||||
|
||||
perf_rdata = PCCR_q[csr_addr_i[4:0]];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// performance counter counter update logic
|
||||
`ifdef SYNTHESIS
|
||||
// for synthesis we just have one performance counter register
|
||||
assign PCCR_inc[0] = (|(PCCR_in & PCER_q)) & PCMR_q[0];
|
||||
|
||||
always_comb
|
||||
begin
|
||||
PCCR_n[0] = PCCR_q[0];
|
||||
|
||||
if ((PCCR_inc_q[0] == 1'b1) && ((PCCR_q[0] != 32'hFFFFFFFF) || (csr[`CSR_IDX_PCMR][1] == 1'b0)))
|
||||
PCCR_n[0] = PCCR_q[0] + 1;
|
||||
|
||||
if (is_pccr == 1'b1) begin
|
||||
unique case (csr_op_i)
|
||||
`CSR_OP_NONE: ;
|
||||
`CSR_OP_WRITE: PCCR_n[i] <= csr_wdata_i;
|
||||
`CSR_OP_SET: PCCR_n[i] <= csr_wdata_i | PCCR_q[i];
|
||||
`CSR_OP_CLEAR: PCCR_n[i] <= csr_wdata_i & ~(PCCR_q[i]);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`else
|
||||
always_comb
|
||||
begin
|
||||
for(int i = 0; i < N_PERF_COUNTERS; i++)
|
||||
begin : PERF_CNT_INC
|
||||
PCCR_inc[i] = PCCR_in[i] & PCER_q[i] & PCMR_q[0];
|
||||
|
||||
PCCR_n[i] = PCCR_q[i];
|
||||
|
||||
if ((PCCR_inc_q[i] == 1'b1) && ((PCCR_q[i] != 32'hFFFFFFFF) || (PCMR_q[1] == 1'b0)))
|
||||
PCCR_n[i] = PCCR_q[i] + 1;
|
||||
|
||||
if (is_pccr == 1'b1 && (pccr_all_sel == 1'b1 || pccr_index == i)) begin
|
||||
unique case (csr_op_i)
|
||||
`CSR_OP_NONE: ;
|
||||
`CSR_OP_WRITE: PCCR_n[i] <= csr_wdata_i;
|
||||
`CSR_OP_SET: PCCR_n[i] <= csr_wdata_i | PCCR_q[i];
|
||||
`CSR_OP_CLEAR: PCCR_n[i] <= csr_wdata_i & ~(PCCR_q[i]);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
// update PCMR and PCER
|
||||
always_comb
|
||||
begin
|
||||
PCMR_n = PCMR_q;
|
||||
PCER_n = PCER_q;
|
||||
|
||||
if (is_pcmr) begin
|
||||
unique case (csr_op_i)
|
||||
`CSR_OP_NONE: ;
|
||||
`CSR_OP_WRITE: PCMR_n <= csr_wdata_i;
|
||||
`CSR_OP_SET: PCMR_n <= csr_wdata_i | PCMR_q;
|
||||
`CSR_OP_CLEAR: PCMR_n <= csr_wdata_i & ~(PCMR_q);
|
||||
endcase
|
||||
end
|
||||
|
||||
if (is_pcer) begin
|
||||
unique case (csr_op_i)
|
||||
`CSR_OP_NONE: ;
|
||||
`CSR_OP_WRITE: PCER_n <= csr_wdata_i;
|
||||
`CSR_OP_SET: PCER_n <= csr_wdata_i | PCER_q;
|
||||
`CSR_OP_CLEAR: PCER_n <= csr_wdata_i & ~(PCER_q);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// Performance Counter Registers
|
||||
always_ff @(posedge clk, negedge rst_n)
|
||||
begin
|
||||
if (rst_n == 1'b0)
|
||||
begin
|
||||
stall_id_q <= 1'b0;
|
||||
|
||||
PCER_q <= 'h0;
|
||||
PCMR_q <= 2'h3;
|
||||
|
||||
for(int i = 0; i < N_PERF_REGS; i++)
|
||||
begin
|
||||
PCCR_q[i] <= 'h0;
|
||||
PCCR_inc_q[i] <= 'h0;
|
||||
end
|
||||
end
|
||||
else
|
||||
begin
|
||||
stall_id_q <= stall_id_i;
|
||||
|
||||
PCER_q <= PCER_n;
|
||||
PCMR_q <= PCMR_n;
|
||||
|
||||
for(int i = 0; i < N_PERF_REGS; i++)
|
||||
begin
|
||||
PCCR_q[i] <= PCCR_n[i];
|
||||
PCCR_inc_q[i] <= PCCR_inc[i];
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
19
id_stage.sv
19
id_stage.sv
|
@ -149,13 +149,17 @@ module id_stage
|
|||
|
||||
input logic [4:0] regfile_alu_waddr_fw_i,
|
||||
input logic regfile_alu_we_fw_i,
|
||||
input logic [31:0] regfile_alu_wdata_fw_i
|
||||
input logic [31:0] regfile_alu_wdata_fw_i,
|
||||
|
||||
`ifdef TCDM_ADDR_PRECAL
|
||||
,
|
||||
output logic [31:0] alu_adder_o
|
||||
output logic [31:0] alu_adder_o,
|
||||
`endif
|
||||
|
||||
// Performance Counters
|
||||
output logic perf_jump_o, // we are executing a jump instruction (j, jr, jal, jalr)
|
||||
output logic perf_branch_o, // we are executing a branch instruction (bf, bnf)
|
||||
output logic perf_jr_stall_o, // jump-register-hazard
|
||||
output logic perf_ld_stall_o // load-use-hazard
|
||||
);
|
||||
|
||||
|
||||
|
@ -643,7 +647,14 @@ module id_stage
|
|||
.stall_if_o ( stall_if_o ),
|
||||
.stall_id_o ( stall_id_o ),
|
||||
.stall_ex_o ( stall_ex_o ),
|
||||
.stall_wb_o ( stall_wb_o )
|
||||
.stall_wb_o ( stall_wb_o ),
|
||||
|
||||
// Performance Counters
|
||||
.perf_jump_o ( perf_jump_o ),
|
||||
.perf_branch_o ( perf_branch_o ),
|
||||
.perf_jr_stall_o ( perf_jr_stall_o ),
|
||||
.perf_ld_stall_o ( perf_ld_stall_o )
|
||||
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -29,6 +29,9 @@
|
|||
|
||||
|
||||
module riscv_core
|
||||
#(
|
||||
parameter N_EXT_PERF_COUNTERS = 0
|
||||
)
|
||||
(
|
||||
// Clock and Reset
|
||||
input logic clk,
|
||||
|
@ -72,7 +75,9 @@ module riscv_core
|
|||
|
||||
// CPU Control Signals
|
||||
input logic fetch_enable_i,
|
||||
output logic core_busy_o
|
||||
output logic core_busy_o,
|
||||
|
||||
input logic [N_EXT_PERF_COUNTERS-1:0] ext_perf_counters_i
|
||||
);
|
||||
|
||||
|
||||
|
@ -232,6 +237,12 @@ module riscv_core
|
|||
logic [31:0] alu_adder_ex;
|
||||
`endif
|
||||
|
||||
// Performance Counters
|
||||
logic perf_jump;
|
||||
logic perf_branch;
|
||||
logic perf_jr_stall;
|
||||
logic perf_ld_stall;
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
|
@ -419,11 +430,15 @@ module riscv_core
|
|||
|
||||
.regfile_waddr_wb_i ( regfile_waddr_fw_wb_o ), // Write address ex-wb pipeline
|
||||
.regfile_we_wb_i ( regfile_we_wb ), // write enable for the register file
|
||||
.regfile_wdata_wb_i ( regfile_wdata ) // write data to commit in the register file
|
||||
.regfile_wdata_wb_i ( regfile_wdata ), // write data to commit in the register file
|
||||
`ifdef TCDM_ADDR_PRECAL
|
||||
,
|
||||
.alu_adder_o ( alu_adder_ex )
|
||||
.alu_adder_o ( alu_adder_ex ),
|
||||
`endif
|
||||
|
||||
.perf_jump_o ( perf_jump ),
|
||||
.perf_branch_o ( perf_branch ),
|
||||
.perf_jr_stall_o ( perf_jr_stall ),
|
||||
.perf_ld_stall_o ( perf_ld_stall )
|
||||
);
|
||||
|
||||
|
||||
|
@ -609,7 +624,22 @@ module riscv_core
|
|||
.curr_pc_id_i ( current_pc_id ), // from IF stage
|
||||
.save_pc_if_i ( save_pc_if ),
|
||||
.save_pc_id_i ( save_pc_id ),
|
||||
.epcr_o ( epcr )
|
||||
.epcr_o ( epcr ),
|
||||
|
||||
// performance counter related signals
|
||||
.stall_id_i ( stall_id ),
|
||||
|
||||
.instr_fetch_i ( ~instr_ack_int ),
|
||||
|
||||
.jump_i ( perf_jump ),
|
||||
.branch_i ( perf_branch ),
|
||||
.ld_stall_i ( perf_ld_stall ),
|
||||
.jr_stall_i ( perf_jr_stall ),
|
||||
|
||||
.mem_load_i ( data_req_o & data_gnt_i & (~data_we_o) ),
|
||||
.mem_store_i ( data_req_o & data_gnt_i & data_we_o ),
|
||||
|
||||
.ext_counters_i ( ext_perf_counters_i )
|
||||
);
|
||||
|
||||
// Mux for SPR access through Debug Unit
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue