Merge pull request #624 from ganoam/fpga-opt-perf-mon-pr

Modifiy Performance Counter for DSP Inference
This commit is contained in:
Noam Gallmann 2020-03-06 12:49:51 +01:00 committed by GitHub
parent 3d827e1db1
commit 11a5fc24d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 198 additions and 47 deletions

View file

@ -122,3 +122,17 @@ The remaining event selector CSRs are tied to 0, i.e., no events are counted by
+----------------------+-------------+-------------+--------------+
| ``mhpmevent10(h)`` | 0x32A | 0x0000_0400 | 10 |
+----------------------+-------------+-------------+--------------+
FPGA Targets
------------
For FPGA targets the performance counters constitute a particularily large structure.
Implementing the maximum 29 event counters 32, 48 and 64 bit wide results in relative logic utilizations of the core of 100%, 111% and 129% respectively.
The relative numbers of flip-flops are 100%, 125% and 150%.
It is recommended to implement event counters of 32 bit width where possible.
For Xilinx FPGA devices featuring the `DSP48E1` DSP slice or similar, counter logic can be absorbed into the DSP slice for widths up to 48 bits.
The resulting relative logic utilizations with respect to the non-DSP 32 bit counter implementation are 83% and 89% respectively for 32 and 48 bit DSP counters.
This comes at the expense of 1 DSP slice per counter.
For 32 bit counters only, the corresponding flip-flops can be incorporated into the DSP's output pipeline register, resulting in a reduction of the number of flip-flops to 50%.
In order to infer DSP slices for performance counters, define the preprocessor variable ``FPGA_XILINX``.

View file

@ -21,6 +21,7 @@ ${PRJ_DIR}/ibex/rtl/ibex_alu.sv
${PRJ_DIR}/ibex/rtl/ibex_compressed_decoder.sv
${PRJ_DIR}/ibex/rtl/ibex_controller.sv
${PRJ_DIR}/ibex/rtl/ibex_cs_registers.sv
${PRJ_DIR}/ibex/rtl/ibex_counters.sv
${PRJ_DIR}/ibex/rtl/ibex_decoder.sv
${PRJ_DIR}/ibex/rtl/ibex_ex_block.sv
${PRJ_DIR}/ibex/rtl/ibex_id_stage.sv

View file

@ -31,6 +31,12 @@ parameters:
default: "../../../../../examples/sw/led/led.vmem"
paramtype: vlogdefine
FPGA_XILINX:
datatype: str
description: Identifies Xilinx FPGA targets to set DSP pragmas for performance counters.
default: 1
paramtype: vlogdefine
targets:
synth:
default_tool: vivado
@ -40,6 +46,7 @@ targets:
toplevel: top_artya7
parameters:
- SRAM_INIT_FILE
- FPGA_XILINX
tools:
vivado:
part: "xc7a100tcsg324-1" # Default to Arty A7-100

View file

@ -15,6 +15,7 @@ filesets:
- rtl/ibex_compressed_decoder.sv
- rtl/ibex_controller.sv
- rtl/ibex_cs_registers.sv
- rtl/ibex_counters.sv
- rtl/ibex_decoder.sv
- rtl/ibex_ex_block.sv
- rtl/ibex_fetch_fifo.sv

View file

@ -58,6 +58,52 @@ lint_off -rule UNUSED -file "*/rtl/sim/simulator_ctrl.sv" -match "*'wdata_i'[31:
// entire 32-bit address around to make the code a bit cleaner.
lint_off -rule UNUSED -file "*/rtl/timer.sv" -match "*'timer_addr_i'[31:10]*"
// Bits of signal are not used for MHPMCounterNum < 29: mhpmcounter_we[31:MHPMCounterNum+3]
// cleaner to write all bits even if not all are used
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounter_we'[31:MHPMCounterNum+3]*"
// Bits of signal are not used: mhpmcounter_we[1]
// Bits of signal are not used: mhpmcounterh_we[1]
// Bits of signal are not used: mhpmcounter_incr[1]
//
// cleaner to write all bits even if not all are used
//
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounter_we'[1]*"
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounterh_we'[1]*"
lint_off -rule UNUSED -file "*/rtl/ibex_cs_registers.sv" -match "*'mhpmcounter_incr'[1]*"
// Signals are unused if MHPMCounterNum == 0: clk_i, rst_ni
// Signal is unused if MHPMCounterNum == 0: counter_val_i[31:0]
//
// If no counters are implemented, no flops are elaborated. No clock, reset or
// next-state logic is used.
//
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'clk_i'"
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'rst_ni'"
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_val_i'"
// Bits of signal are not used for MHPMCounterNum < 29: counter_inc_i[28:MHPMCounterNum]
// Bits of signal are not used for MHPMCounterNum < 29: counterh_we_i[28:MHPMCounterNum]
// Bits of signal are not used for MHPMCounterNum < 29: counter_we_i[28:MHPMCounterNum]
//
// cleaner to write all bits even if not all are used
//
// lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_inc_i'[28:*]*"
// lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counterh_we_i'[28:*]*"
// lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_we_i'[28:*]*"
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*counter_inc_i*"
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*counterh_we_i*"
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*counter_we_i*"
// Bits of signal are not used for MHPMCounterWidth < 64: counter_upd[63:MHPMCounterWidth]
// Bits of signal are not used for MHPMCounterWidth < 64: counter_load[63:MHPMCounterWidth]
//
// cleaner to write all bits even if not all are used
//
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_upd'[63:*]*"
lint_off -rule UNUSED -file "*/rtl/ibex_counters.sv" -match "*'counter_load'[63:*]*"
// Signal is not used: test_en_i
// testability signal
lint_off -rule UNUSED -file "*/rtl/ibex_register_file_ff.sv" -match "*test_en_i*"

View file

@ -2,6 +2,7 @@ ibex_pkg.sv
ibex_alu.sv
ibex_compressed_decoder.sv
ibex_controller.sv
ibex_counter.sv
ibex_cs_registers.sv
ibex_decoder.sv
ibex_ex_block.sv

86
rtl/ibex_counters.sv Normal file
View file

@ -0,0 +1,86 @@
module ibex_counters #(
parameter int MaxNumCounters = 29,
parameter int NumCounters = 0,
parameter int CounterWidth = 32
) (
input clk_i,
input rst_ni,
input logic [MaxNumCounters-1:0] counter_inc_i,
input logic [MaxNumCounters-1:0] counterh_we_i,
input logic [MaxNumCounters-1:0] counter_we_i,
input logic [31:0] counter_val_i,
output logic [63:0] counter_val_o [MaxNumCounters]
);
logic [63:0] counter [MaxNumCounters];
assign counter_val_o = counter;
for (genvar i = 0; i < MaxNumCounters; i++) begin : g_counter
// Only elaborate flops that are needed from the given CounterWidth and NumCounters.
if (i < NumCounters) begin : g_counter_exists
logic [63:0] counter_upd;
logic [63:0] counter_load;
logic we;
logic [CounterWidth-1:0] counter_d;
// Update
always_comb begin
// Write
we = counter_we_i[i] | counterh_we_i[i];
counter_load[63:32] = counter[i][63:32];
counter_load[31:0] = counter_val_i;
if (counterh_we_i[i]) begin
counter_load[63:32] = counter_val_i;
counter_load[31:0] = counter[i][31:0];
end
// Increment
counter_upd = counter[i] + 64'h1;
// Next value logic
if (we) begin
counter_d = counter_load[CounterWidth-1:0];
end else if (counter_inc_i[i])begin
counter_d = counter_upd[CounterWidth-1:0];
end else begin
counter_d = counter[i][CounterWidth-1:0];
end
end
`ifdef FPGA_XILINX
// Set DSP pragma for supported xilinx FPGAs
localparam dsp_pragma = CounterWidth < 49 ? "yes" : "no";
(* use_dsp = dsp_pragma *) logic [CounterWidth-1:0] counter_q;
`else
logic [CounterWidth-1:0] counter_q;
`endif
// Counter flop
`ifdef FPGA_XILINX
// DSP output register requires synchronous reset.
always @(posedge clk_i) begin
`else
always @(posedge clk_i or negedge rst_ni) begin
`endif
if (!rst_ni) begin
counter_q <= '0;
end else begin
counter_q <= counter_d;
end
end
if (CounterWidth < 64) begin : g_counter_narrow
assign counter[i][CounterWidth-1:0] = counter_q;
assign counter[i][63:CounterWidth] = '0;
end else begin : g_counter_full
assign counter[i] = counter_q;
end
end else begin : g_no_counter
assign counter[i] = '0;
end
end
endmodule

View file

@ -183,7 +183,6 @@ module ibex_cs_registers #(
logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q;
logic mcountinhibit_we;
logic [63:0] mhpmcounter_d [32];
// mhpmcounter flops are elaborated below providing only the precise number that is required based
// on MHPMCounterNum/MHPMCounterWidth. This signal connects to the Q output of these flops
// where they exist and is otherwise 0.
@ -877,55 +876,51 @@ module ibex_cs_registers #(
end
end
// update
always_comb begin : mhpmcounter_update
mhpmcounter_d = mhpmcounter;
// mcycle and minstret
ibex_counters #(
.MaxNumCounters(1),
.NumCounters(1),
.CounterWidth(64)
) mcycle_counter_i (
.clk_i(clk_i),
.rst_ni(rst_ni),
.counter_inc_i(mhpmcounter_incr[0] & ~mcountinhibit[0]),
.counterh_we_i(mhpmcounterh_we[0]),
.counter_we_i(mhpmcounter_we[0]),
.counter_val_i(csr_wdata_int),
.counter_val_o(mhpmcounter[0:0])
);
for (int i=0; i<32; i++) begin : gen_mhpmcounter_update
ibex_counters #(
.MaxNumCounters(1),
.NumCounters(1),
.CounterWidth(64)
) minstret_counter_i (
.clk_i(clk_i),
.rst_ni(rst_ni),
.counter_inc_i(mhpmcounter_incr[2] & ~mcountinhibit[2]),
.counterh_we_i(mhpmcounterh_we[2]),
.counter_we_i(mhpmcounter_we[2]),
.counter_val_i(csr_wdata_int),
.counter_val_o(mhpmcounter[2:2])
);
// increment
if (mhpmcounter_incr[i] & ~mcountinhibit[i]) begin
mhpmcounter_d[i] = mhpmcounter[i] + 64'h1;
end
// reserved:
assign mhpmcounter[1] = '0;
// write
if (mhpmcounter_we[i]) begin
mhpmcounter_d[i][31: 0] = csr_wdata_int;
end else if (mhpmcounterh_we[i]) begin
mhpmcounter_d[i][63:32] = csr_wdata_int;
end
end
end
// Performance monitor registers
// Only elaborate flops that are needed from the given MHPMCounterWidth and MHPMCounterNum
// parameters
for (genvar i = 0; i < 32; i++) begin : g_mhpmcounter
// First 3 counters (cycle, time, instret) must always be elaborated
if (i < 3 + MHPMCounterNum) begin : g_mhpmcounter_exists
// First 3 counters must be 64-bit the rest have parameterisable width
localparam int unsigned IMHPMCounterWidth = i < 3 ? 64 : MHPMCounterWidth;
logic [IMHPMCounterWidth-1:0] mhpmcounter_q;
always @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
mhpmcounter_q <= '0;
end else begin
mhpmcounter_q <= mhpmcounter_d[i][IMHPMCounterWidth-1:0];
end
end
if (IMHPMCounterWidth < 64) begin : g_mhpmcounter_narrow
assign mhpmcounter[i][IMHPMCounterWidth-1:0] = mhpmcounter_q;
assign mhpmcounter[i][63:IMHPMCounterWidth] = '0;
end else begin : g_mhpmcounter_full
assign mhpmcounter[i] = mhpmcounter_q;
end
end else begin : g_no_mhpmcounter
assign mhpmcounter[i] = '0;
end
end
ibex_counters #(
.MaxNumCounters(29),
.NumCounters(MHPMCounterNum),
.CounterWidth(MHPMCounterWidth)
) mcounters_variable_i (
.clk_i(clk_i),
.rst_ni(rst_ni),
.counter_inc_i(mhpmcounter_incr[31:3] & ~mcountinhibit[31:3]),
.counterh_we_i(mhpmcounterh_we[31:3]),
.counter_we_i(mhpmcounter_we[31:3]),
.counter_val_i(csr_wdata_int),
.counter_val_o(mhpmcounter[3:31])
);
if(MHPMCounterNum < 29) begin : g_mcountinhibit_reduced
assign mcountinhibit = {{29-MHPMCounterNum{1'b1}}, mcountinhibit_q};