mirror of
https://github.com/lowRISC/ibex.git
synced 2025-04-22 21:07:34 -04:00
Merge pull request #624 from ganoam/fpga-opt-perf-mon-pr
Modifiy Performance Counter for DSP Inference
This commit is contained in:
parent
3d827e1db1
commit
11a5fc24d4
8 changed files with 198 additions and 47 deletions
|
@ -122,3 +122,17 @@ The remaining event selector CSRs are tied to 0, i.e., no events are counted by
|
|||
+----------------------+-------------+-------------+--------------+
|
||||
| ``mhpmevent10(h)`` | 0x32A | 0x0000_0400 | 10 |
|
||||
+----------------------+-------------+-------------+--------------+
|
||||
|
||||
FPGA Targets
|
||||
------------
|
||||
|
||||
For FPGA targets the performance counters constitute a particularily large structure.
|
||||
Implementing the maximum 29 event counters 32, 48 and 64 bit wide results in relative logic utilizations of the core of 100%, 111% and 129% respectively.
|
||||
The relative numbers of flip-flops are 100%, 125% and 150%.
|
||||
It is recommended to implement event counters of 32 bit width where possible.
|
||||
|
||||
For Xilinx FPGA devices featuring the `DSP48E1` DSP slice or similar, counter logic can be absorbed into the DSP slice for widths up to 48 bits.
|
||||
The resulting relative logic utilizations with respect to the non-DSP 32 bit counter implementation are 83% and 89% respectively for 32 and 48 bit DSP counters.
|
||||
This comes at the expense of 1 DSP slice per counter.
|
||||
For 32 bit counters only, the corresponding flip-flops can be incorporated into the DSP's output pipeline register, resulting in a reduction of the number of flip-flops to 50%.
|
||||
In order to infer DSP slices for performance counters, define the preprocessor variable ``FPGA_XILINX``.
|
||||
|
|
|
@ -21,6 +21,7 @@ ${PRJ_DIR}/ibex/rtl/ibex_alu.sv
|
|||
${PRJ_DIR}/ibex/rtl/ibex_compressed_decoder.sv
|
||||
${PRJ_DIR}/ibex/rtl/ibex_controller.sv
|
||||
${PRJ_DIR}/ibex/rtl/ibex_cs_registers.sv
|
||||
${PRJ_DIR}/ibex/rtl/ibex_counters.sv
|
||||
${PRJ_DIR}/ibex/rtl/ibex_decoder.sv
|
||||
${PRJ_DIR}/ibex/rtl/ibex_ex_block.sv
|
||||
${PRJ_DIR}/ibex/rtl/ibex_id_stage.sv
|
||||
|
|
|
@ -31,6 +31,12 @@ parameters:
|
|||
default: "../../../../../examples/sw/led/led.vmem"
|
||||
paramtype: vlogdefine
|
||||
|
||||
FPGA_XILINX:
|
||||
datatype: str
|
||||
description: Identifies Xilinx FPGA targets to set DSP pragmas for performance counters.
|
||||
default: 1
|
||||
paramtype: vlogdefine
|
||||
|
||||
targets:
|
||||
synth:
|
||||
default_tool: vivado
|
||||
|
@ -40,6 +46,7 @@ targets:
|
|||
toplevel: top_artya7
|
||||
parameters:
|
||||
- SRAM_INIT_FILE
|
||||
- FPGA_XILINX
|
||||
tools:
|
||||
vivado:
|
||||
part: "xc7a100tcsg324-1" # Default to Arty A7-100
|
||||
|
|
|
@ -15,6 +15,7 @@ filesets:
|
|||
- rtl/ibex_compressed_decoder.sv
|
||||
- rtl/ibex_controller.sv
|
||||
- rtl/ibex_cs_registers.sv
|
||||
- rtl/ibex_counters.sv
|
||||
- rtl/ibex_decoder.sv
|
||||
- rtl/ibex_ex_block.sv
|
||||
- rtl/ibex_fetch_fifo.sv
|
||||
|
|
|
@ -58,6 +58,52 @@ lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'wdata_i'[31:
|
|||
// entire 32-bit address around to make the code a bit cleaner.
|
||||
lint_off -rule UNUSED -file "*/rtl/timer.sv" -match "*'timer_addr_i'[31:10]*"
|
||||
|
||||
// Bits of signal are not used for MHPMCounterNum < 29: mhpmcounter_we[31:MHPMCounterNum+3]
|
||||
// cleaner to write all bits even if not all are used
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounter_we'[31:MHPMCounterNum+3]*"
|
||||
|
||||
// Bits of signal are not used: mhpmcounter_we[1]
|
||||
// Bits of signal are not used: mhpmcounterh_we[1]
|
||||
// Bits of signal are not used: mhpmcounter_incr[1]
|
||||
//
|
||||
// cleaner to write all bits even if not all are used
|
||||
//
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounter_we'[1]*"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounterh_we'[1]*"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounter_incr'[1]*"
|
||||
|
||||
// Signals are unused if MHPMCounterNum == 0: clk_i, rst_ni
|
||||
// Signal is unused if MHPMCounterNum == 0: counter_val_i[31:0]
|
||||
//
|
||||
// If no counters are implemented, no flops are elaborated. No clock, reset or
|
||||
// next-state logic is used.
|
||||
//
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'clk_i'"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'rst_ni'"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_val_i'"
|
||||
|
||||
// Bits of signal are not used for MHPMCounterNum < 29: counter_inc_i[28:MHPMCounterNum]
|
||||
// Bits of signal are not used for MHPMCounterNum < 29: counterh_we_i[28:MHPMCounterNum]
|
||||
// Bits of signal are not used for MHPMCounterNum < 29: counter_we_i[28:MHPMCounterNum]
|
||||
//
|
||||
// cleaner to write all bits even if not all are used
|
||||
//
|
||||
// lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_inc_i'[28:*]*"
|
||||
// lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counterh_we_i'[28:*]*"
|
||||
// lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_we_i'[28:*]*"
|
||||
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*counter_inc_i*"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*counterh_we_i*"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*counter_we_i*"
|
||||
|
||||
// Bits of signal are not used for MHPMCounterWidth < 64: counter_upd[63:MHPMCounterWidth]
|
||||
// Bits of signal are not used for MHPMCounterWidth < 64: counter_load[63:MHPMCounterWidth]
|
||||
//
|
||||
// cleaner to write all bits even if not all are used
|
||||
//
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_upd'[63:*]*"
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_load'[63:*]*"
|
||||
|
||||
// Signal is not used: test_en_i
|
||||
// testability signal
|
||||
lint_off -rule UNUSED -file "*/rtl/ibex_register_file_ff.sv" -match "*test_en_i*"
|
||||
|
|
|
@ -2,6 +2,7 @@ ibex_pkg.sv
|
|||
ibex_alu.sv
|
||||
ibex_compressed_decoder.sv
|
||||
ibex_controller.sv
|
||||
ibex_counter.sv
|
||||
ibex_cs_registers.sv
|
||||
ibex_decoder.sv
|
||||
ibex_ex_block.sv
|
||||
|
|
86
rtl/ibex_counters.sv
Normal file
86
rtl/ibex_counters.sv
Normal file
|
@ -0,0 +1,86 @@
|
|||
module ibex_counters #(
|
||||
parameter int MaxNumCounters = 29,
|
||||
parameter int NumCounters = 0,
|
||||
parameter int CounterWidth = 32
|
||||
) (
|
||||
input clk_i,
|
||||
input rst_ni,
|
||||
|
||||
input logic [MaxNumCounters-1:0] counter_inc_i,
|
||||
input logic [MaxNumCounters-1:0] counterh_we_i,
|
||||
input logic [MaxNumCounters-1:0] counter_we_i,
|
||||
input logic [31:0] counter_val_i,
|
||||
output logic [63:0] counter_val_o [MaxNumCounters]
|
||||
);
|
||||
logic [63:0] counter [MaxNumCounters];
|
||||
|
||||
assign counter_val_o = counter;
|
||||
|
||||
for (genvar i = 0; i < MaxNumCounters; i++) begin : g_counter
|
||||
// Only elaborate flops that are needed from the given CounterWidth and NumCounters.
|
||||
if (i < NumCounters) begin : g_counter_exists
|
||||
|
||||
logic [63:0] counter_upd;
|
||||
logic [63:0] counter_load;
|
||||
logic we;
|
||||
logic [CounterWidth-1:0] counter_d;
|
||||
|
||||
// Update
|
||||
always_comb begin
|
||||
|
||||
// Write
|
||||
we = counter_we_i[i] | counterh_we_i[i];
|
||||
counter_load[63:32] = counter[i][63:32];
|
||||
counter_load[31:0] = counter_val_i;
|
||||
if (counterh_we_i[i]) begin
|
||||
counter_load[63:32] = counter_val_i;
|
||||
counter_load[31:0] = counter[i][31:0];
|
||||
end
|
||||
|
||||
// Increment
|
||||
counter_upd = counter[i] + 64'h1;
|
||||
|
||||
// Next value logic
|
||||
if (we) begin
|
||||
counter_d = counter_load[CounterWidth-1:0];
|
||||
end else if (counter_inc_i[i])begin
|
||||
counter_d = counter_upd[CounterWidth-1:0];
|
||||
end else begin
|
||||
counter_d = counter[i][CounterWidth-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef FPGA_XILINX
|
||||
// Set DSP pragma for supported xilinx FPGAs
|
||||
localparam dsp_pragma = CounterWidth < 49 ? "yes" : "no";
|
||||
(* use_dsp = dsp_pragma *) logic [CounterWidth-1:0] counter_q;
|
||||
`else
|
||||
logic [CounterWidth-1:0] counter_q;
|
||||
`endif
|
||||
|
||||
// Counter flop
|
||||
`ifdef FPGA_XILINX
|
||||
// DSP output register requires synchronous reset.
|
||||
always @(posedge clk_i) begin
|
||||
`else
|
||||
always @(posedge clk_i or negedge rst_ni) begin
|
||||
`endif
|
||||
if (!rst_ni) begin
|
||||
counter_q <= '0;
|
||||
end else begin
|
||||
counter_q <= counter_d;
|
||||
end
|
||||
end
|
||||
|
||||
if (CounterWidth < 64) begin : g_counter_narrow
|
||||
assign counter[i][CounterWidth-1:0] = counter_q;
|
||||
assign counter[i][63:CounterWidth] = '0;
|
||||
end else begin : g_counter_full
|
||||
assign counter[i] = counter_q;
|
||||
end
|
||||
end else begin : g_no_counter
|
||||
assign counter[i] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -183,7 +183,6 @@ module ibex_cs_registers #(
|
|||
logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q;
|
||||
logic mcountinhibit_we;
|
||||
|
||||
logic [63:0] mhpmcounter_d [32];
|
||||
// mhpmcounter flops are elaborated below providing only the precise number that is required based
|
||||
// on MHPMCounterNum/MHPMCounterWidth. This signal connects to the Q output of these flops
|
||||
// where they exist and is otherwise 0.
|
||||
|
@ -877,55 +876,51 @@ module ibex_cs_registers #(
|
|||
end
|
||||
end
|
||||
|
||||
// update
|
||||
always_comb begin : mhpmcounter_update
|
||||
mhpmcounter_d = mhpmcounter;
|
||||
// mcycle and minstret
|
||||
ibex_counters #(
|
||||
.MaxNumCounters(1),
|
||||
.NumCounters(1),
|
||||
.CounterWidth(64)
|
||||
) mcycle_counter_i (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.counter_inc_i(mhpmcounter_incr[0] & ~mcountinhibit[0]),
|
||||
.counterh_we_i(mhpmcounterh_we[0]),
|
||||
.counter_we_i(mhpmcounter_we[0]),
|
||||
.counter_val_i(csr_wdata_int),
|
||||
.counter_val_o(mhpmcounter[0:0])
|
||||
);
|
||||
|
||||
for (int i=0; i<32; i++) begin : gen_mhpmcounter_update
|
||||
ibex_counters #(
|
||||
.MaxNumCounters(1),
|
||||
.NumCounters(1),
|
||||
.CounterWidth(64)
|
||||
) minstret_counter_i (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.counter_inc_i(mhpmcounter_incr[2] & ~mcountinhibit[2]),
|
||||
.counterh_we_i(mhpmcounterh_we[2]),
|
||||
.counter_we_i(mhpmcounter_we[2]),
|
||||
.counter_val_i(csr_wdata_int),
|
||||
.counter_val_o(mhpmcounter[2:2])
|
||||
);
|
||||
|
||||
// increment
|
||||
if (mhpmcounter_incr[i] & ~mcountinhibit[i]) begin
|
||||
mhpmcounter_d[i] = mhpmcounter[i] + 64'h1;
|
||||
end
|
||||
// reserved:
|
||||
assign mhpmcounter[1] = '0;
|
||||
|
||||
// write
|
||||
if (mhpmcounter_we[i]) begin
|
||||
mhpmcounter_d[i][31: 0] = csr_wdata_int;
|
||||
end else if (mhpmcounterh_we[i]) begin
|
||||
mhpmcounter_d[i][63:32] = csr_wdata_int;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Performance monitor registers
|
||||
// Only elaborate flops that are needed from the given MHPMCounterWidth and MHPMCounterNum
|
||||
// parameters
|
||||
for (genvar i = 0; i < 32; i++) begin : g_mhpmcounter
|
||||
// First 3 counters (cycle, time, instret) must always be elaborated
|
||||
if (i < 3 + MHPMCounterNum) begin : g_mhpmcounter_exists
|
||||
// First 3 counters must be 64-bit the rest have parameterisable width
|
||||
localparam int unsigned IMHPMCounterWidth = i < 3 ? 64 : MHPMCounterWidth;
|
||||
|
||||
logic [IMHPMCounterWidth-1:0] mhpmcounter_q;
|
||||
|
||||
always @(posedge clk_i or negedge rst_ni) begin
|
||||
if(~rst_ni) begin
|
||||
mhpmcounter_q <= '0;
|
||||
end else begin
|
||||
mhpmcounter_q <= mhpmcounter_d[i][IMHPMCounterWidth-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
if (IMHPMCounterWidth < 64) begin : g_mhpmcounter_narrow
|
||||
assign mhpmcounter[i][IMHPMCounterWidth-1:0] = mhpmcounter_q;
|
||||
assign mhpmcounter[i][63:IMHPMCounterWidth] = '0;
|
||||
end else begin : g_mhpmcounter_full
|
||||
assign mhpmcounter[i] = mhpmcounter_q;
|
||||
end
|
||||
end else begin : g_no_mhpmcounter
|
||||
assign mhpmcounter[i] = '0;
|
||||
end
|
||||
end
|
||||
ibex_counters #(
|
||||
.MaxNumCounters(29),
|
||||
.NumCounters(MHPMCounterNum),
|
||||
.CounterWidth(MHPMCounterWidth)
|
||||
) mcounters_variable_i (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.counter_inc_i(mhpmcounter_incr[31:3] & ~mcountinhibit[31:3]),
|
||||
.counterh_we_i(mhpmcounterh_we[31:3]),
|
||||
.counter_we_i(mhpmcounter_we[31:3]),
|
||||
.counter_val_i(csr_wdata_int),
|
||||
.counter_val_o(mhpmcounter[3:31])
|
||||
);
|
||||
|
||||
if(MHPMCounterNum < 29) begin : g_mcountinhibit_reduced
|
||||
assign mcountinhibit = {{29-MHPMCounterNum{1'b1}}, mcountinhibit_q};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue