diff --git a/README.md b/README.md index 7cdbbdb9..68db45c9 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,13 @@ The options include different choices for the architecture of the multiplier uni The table below indicates performance, area and verification status for a few selected configurations. These are configurations on which lowRISC is focusing for performance evaluation and design verification (see [supported configs](ibex_configs.yaml)). -| Config | "small" | "maxperf" | "maxperf-pmp-bmfull" | +| Config | "micro" | "small" | "maxperf" | "maxperf-pmp-bmfull" | | ------ | ------- | --------- | ---------------- | -| Features | RV32IMC, 3 cycle mult | RV32IMC, 1 cycle mult, Branch target ALU, Writeback stage | RV32IMCB, 1 cycle mult, Branch target ALU, Writeback stage, 16 PMP regions | -| Performance (CoreMark/MHz) | 2.47 | 3.13 | 3.05 | -| Area - Yosys (kGE) | 33.15 | 39.03 | 63.32 | -| Area - Commercial (estimated kGE) | ~27 | ~31 | ~50 | -| Verification status | Green | Amber | Amber | +| Features | RV32EC | RV32IMC, 3 cycle mult | RV32IMC, 1 cycle mult, Branch target ALU, Writeback stage | RV32IMCB, 1 cycle mult, Branch target ALU, Writeback stage, 16 PMP regions | +| Performance (CoreMark/MHz) | 0.904 | 2.47 | 3.13 | 3.05 | +| Area - Yosys (kGE) | 17.44 | 26.06 | 35.64 | 58.74 | +| Area - Commercial (estimated kGE) | ~16 | ~24 | ~33 | ~54 | +| Verification status | Red | Green | Amber | Amber | Notes: @@ -34,12 +34,13 @@ Notes: Note that different ISAs (use of B and C extensions) give the best results for different configurations. See the [Benchmarks README](examples/sw/benchmarks/README.md) for more information. The "maxperf-pmp-bmfull" configuration sets a `SpecBranch` parameter in `ibex_core.sv`; this helps timing but has a small negative performance impact. -* Yosys synthesis area numbers are based on the Ibex basic synthesis [flow](syn/README.md). +* Yosys synthesis area numbers are based on the Ibex basic synthesis [flow](syn/README.md) using the latch-based register file. * Commercial synthesis area numbers are a rough estimate of what might be achievable with a commercial synthesis flow and technology library. +* For comparison, the original "Zero-riscy" core yields an area of 23.14kGE using our Yosys synthesis flow. * Verification status is a rough guide to the overall maturity of a particular configuration. Green indicates that verification is close to complete. Amber indicates that some verification has been performed, but the configuration is still experimental. - Red indicates a new configuration with minimal/no verification. + Red indicates a configuration with minimal/no verification. Users must make their own assessment of verification readiness for any tapeout. ## Documentation diff --git a/doc/introduction.rst b/doc/introduction.rst index 27e8cb22..ddabe51b 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -73,7 +73,7 @@ ASIC Synthesis ASIC synthesis is supported for Ibex. The whole design is completely synchronous and uses positive-edge triggered flip-flops, except for the register file, which can be implemented either with latches or with flip-flops. See :ref:`register-file` for more details. -The core occupies an area of roughly 18.9 kGE when using the latch-based register file and implementing the RV32IMC ISA, or 11.6 kGE when implementing the RV32EC ISA. +The core occupies an area of roughly 24 kGE when using the latch-based register file and implementing the RV32IMC ISA, or 16 kGE when implementing the RV32EC ISA. FPGA Synthesis -------------- diff --git a/syn/README.md b/syn/README.md index d7150d83..ded311eb 100644 --- a/syn/README.md +++ b/syn/README.md @@ -98,10 +98,9 @@ Timing reports are produced for the following path groups * Overall - Every path in the design, WNS (worst negative slack) from this report is the design WNS that limits the frequency * reg2reg - Paths from register to register -* in2x - Paths from an input to any end point, one report is produced per named - input (e.g. `instr_data_i` has its own report) -* x2out - Paths from any start point to an output, one report is produced per - named output (e.g. `data_wdata_o` has its own report) +* in2reg - Paths from any input to any register +* reg2out - Paths from any register to any output +* in2out - Paths from any input to any output They are available in two formats .rpt and .csv.rpt. The .rpt is the full output from OpenSTA and gives the full path between the start and end points. The CSV diff --git a/syn/rtl/latch_map.v b/syn/rtl/latch_map.v new file mode 100644 index 00000000..fb1f34d6 --- /dev/null +++ b/syn/rtl/latch_map.v @@ -0,0 +1,12 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Map latch primitives to a specific cell +module $_DLATCH_P_ (input E, input D, output Q); +DLH_X1 _TECHMAP_REPLACE_ ( +.G(E), +.D(D), +.Q(Q) +); +endmodule diff --git a/syn/rtl/prim_clock_gating.v b/syn/rtl/prim_clock_gating.v index ff591723..14d19745 100644 --- a/syn/rtl/prim_clock_gating.v +++ b/syn/rtl/prim_clock_gating.v @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 -// Dummy clock gating module without the clock gate for yosys synthesis +// Example clock gating module for yosys synthesis module prim_clock_gating ( input clk_i, @@ -11,6 +11,13 @@ module prim_clock_gating ( output clk_o ); - assign clk_o = clk_i; + reg en_latch; + + always @* begin + if (!clk_i) begin + en_latch = en_i | test_en_i; + end + end + assign clk_o = en_latch & clk_i; endmodule diff --git a/syn/syn_yosys.sh b/syn/syn_yosys.sh index f7308022..071415ea 100755 --- a/syn/syn_yosys.sh +++ b/syn/syn_yosys.sh @@ -42,9 +42,9 @@ rm -f $LR_SYNTH_OUT_DIR/generated/*_pkg.v # remove tracer (not needed for synthesis) rm -f $LR_SYNTH_OUT_DIR/generated/ibex_tracer.v -# remove the FPGA & latch-based register file (because we will use the -# flop-based one instead) -rm -f $LR_SYNTH_OUT_DIR/generated/ibex_register_file_latch.v +# remove the FPGA & register-based register file (because we will use the +# latch-based one instead) +rm -f $LR_SYNTH_OUT_DIR/generated/ibex_register_file_ff.v rm -f $LR_SYNTH_OUT_DIR/generated/ibex_register_file_fpga.v yosys -c ./tcl/yosys_run_synth.tcl | tee ./$LR_SYNTH_OUT_DIR/log/syn.log diff --git a/syn/tcl/lr_synth_flow_var_setup.tcl b/syn/tcl/lr_synth_flow_var_setup.tcl index f1ef0e7b..cda1cac6 100644 --- a/syn/tcl/lr_synth_flow_var_setup.tcl +++ b/syn/tcl/lr_synth_flow_var_setup.tcl @@ -15,7 +15,7 @@ set_flow_bool_var flatten 1 "flatten" set_flow_bool_var timing_run 0 "timing run" set_flow_bool_var ibex_branch_target_alu 0 "Enable branch target ALU in Ibex" set_flow_bool_var ibex_writeback_stage 0 "Enable writeback stage in Ibex" -set_flow_bool_var ibex_bitmanip 0 "Enable bitmanip extenion for Ibex" +set_flow_var ibex_bitmanip 0 "Bitmanip extenion setting for Ibex (0,1,2 - enums not supported)" set_flow_var ibex_multiplier "fast" "Multiplier implementation for Ibex (slow/fast/single-cycle)" source $lr_synth_config_file @@ -25,18 +25,6 @@ if { $lr_synth_timing_run } { #set_flow_var sdc_file "${top_module}.sdc" "SDC file" set_flow_var sdc_file_in "${lr_synth_top_module}.${lr_synth_cell_library_name}.sdc" "Input SDC file" set_flow_var abc_sdc_file_in "${lr_synth_top_module}_abc.${lr_synth_cell_library_name}.sdc" "Input SDC file for ABC" - set flop_in_pin_default "*/D" - set flop_out_pin_default "*/Q" - - # STA needs to know start and end points for identifying reg2reg paths. These - # can vary depending upon the library used - if { [string first "nangate" $lr_synth_cell_library_name] == 0 } { - set flop_in_pin_default "*/D" - set flop_out_pin_default "*/CK" - } - - set_flow_var flop_in_pin $flop_in_pin_default "In pin to flop for reg2reg path extraction" - set_flow_var flop_out_pin $flop_out_pin_default "Out pin from flop for reg2reg path extraction" set sdc_file_out_default [string range $lr_synth_sdc_file_in 0 [expr [string last ".sdc" $lr_synth_sdc_file_in] - 1]] set sdc_file_out_default "./${lr_synth_out_dir}/generated/$sdc_file_out_default.out.sdc" @@ -45,7 +33,7 @@ if { $lr_synth_timing_run } { set sta_netlist_out_default [string range $lr_synth_netlist_out 0 [expr [string last ".v" $lr_synth_netlist_out] - 1]] set sta_netlist_out_default "$sta_netlist_out_default.sta.v" set_flow_var sta_netlist_out $sta_netlist_out_default "STA netlist out" - set_flow_var sta_paths_per_group 100 "STA paths reported per group" + set_flow_var sta_paths_per_group 1000 "STA paths reported per group" set_flow_var sta_overall_paths 1000 "STA paths reported in overall report" puts "clock period: $lr_synth_clk_period ps" diff --git a/syn/tcl/sta_utils.tcl b/syn/tcl/sta_utils.tcl index 38bec700..b7e5bd10 100644 --- a/syn/tcl/sta_utils.tcl +++ b/syn/tcl/sta_utils.tcl @@ -5,37 +5,28 @@ proc setup_path_groups {input_list output_list path_group_list_name} { upvar $path_group_list_name path_group_list + set flops_in [all_registers -edge_triggered -data_pins] + set flops_out [all_registers -edge_triggered -clock_pins] + group_path -name reg2reg -from $flops_out -to $flops_in + lappend path_group_list reg2reg + foreach output $output_list { set output_name [lindex $output 0] - set output_ports [get_ports $output_name] - set path_group_name "x2out_${output_name}" - group_path -name $path_group_name -to $output_ports - lappend path_group_list $path_group_name + lappend outputs_list [get_ports $output_name] } + group_path -name reg2out -from $flops_out -to $outputs_list + lappend path_group_list reg2out foreach input $input_list { set input_name [lindex $input 0] - set input_ports [get_ports $input_name] - set path_group_name "in2x_${input_name}" - group_path -name $path_group_name -from $input_ports - lappend path_group_list $path_group_name + lappend inputs_list [get_ports $input_name] } + group_path -name in2reg -from $inputs_list -to $flops_in + lappend path_group_list in2reg - global lr_synth_flop_in_pin - global lr_synth_flop_out_pin + group_path -name in2out -from $inputs_list -to $outputs_list + lappend path_group_list in2out - set flops_in [get_pins $lr_synth_flop_in_pin] - set flops_out [get_pins $lr_synth_flop_out_pin] - - group_path -name "reg2reg" -to $flops_in -from $flops_out - lappend path_group_list "reg2reg" -} - -proc setup_i2o_pathgroup {input_name output_name group_name} { - set output_ports [get_ports $output_name] - set input_ports [get_ports $input_name] - - group_path -name $group_name -to $output_ports -from $input_ports } proc timing_report {path_group rpt_out path_count} { diff --git a/syn/tcl/yosys_run_synth.tcl b/syn/tcl/yosys_run_synth.tcl index 7c63960c..7f5bb9b3 100644 --- a/syn/tcl/yosys_run_synth.tcl +++ b/syn/tcl/yosys_run_synth.tcl @@ -24,15 +24,16 @@ if { $lr_synth_ibex_writeback_stage } { yosys "chparam -set WritebackStage 1 ibex_core" } -if { $lr_synth_ibex_bitmanip } { - yosys "chparam -set RV32B 1 ibex_core" -} +yosys "chparam -set RV32B $lr_synth_ibex_bitmanip ibex_core" yosys "chparam -set MultiplierImplementation \"$lr_synth_ibex_multiplier\" ibex_core" yosys "synth $flatten_opt -top $lr_synth_top_module" yosys "opt -purge" +# Map latch primitives onto latch cells +yosys "techmap -map rtl/latch_map.v" + yosys "write_verilog $lr_synth_pre_map_out" yosys "dfflibmap -liberty $lr_synth_cell_library_path"