tech_cells_generic: Upgrade to v0.2.13 (#1676)

2025-06-27 17:00:57 -04:00 · 2023-12-06 11:16:58 +01:00 · 2023-12-06 11:16:58 +01:00 · 29eae1ca22
commit 29eae1ca22
parent 2cfa4e5a10
9 changed files with 103 additions and 34 deletions
--- a/Bender.yml
+++ b/Bender.yml
@ -12,10 +12,7 @@ dependencies:
    { git: "https://github.com/pulp-platform/common_cells", version: 1.23.0 }
  fpnew: { git: "https://github.com/openhwgroup/cvfpu.git", version: 0.7.0 }
  tech_cells_generic:
-    {
-      git: "https://github.com/pulp-platform/tech_cells_generic.git",
-      rev: b2a68114302af1d8191ddf34ea0e07b471911866,
-    }
+    { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.13 }

 frozen: true

--- a/vendor/pulp-platform/tech_cells_generic/CHANGELOG.md
+++ b/vendor/pulp-platform/tech_cells_generic/CHANGELOG.md
@ -4,7 +4,28 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

-## Unreleased
+## 0.2.13 - 2023-09-19
+### Fixed
+- `tc_sram_xilinx`: Fix be assignment
+
+## 0.2.12 - 2023-08-12
+### Changed
+- `tc_sram_xilinx`: Support ByteWidth != 8
+
+## 0.2.11 - 2022-12-12
+### Added
+- `tc_clk_or2`: A new generic tech cell for balanced clock OR-gates.
+- `tc_clk_mux2`: Added warning about misusing `tc_clk_mux2` cells.
+
+## 0.2.10 - 2022-11-20
+### Added
+- `tc_sram_impl`: Wrapper for `tc_sram` with implementation-specific keys and IO
+
+### Changed
+- `tc_sram`: Improve simulation performance
+
+### Fixed
+- `tc_clk_xilinx`: Add `IS_FUNCTIONAL` parameter to match `tc_clk_gating` interface

 ## 0.2.9 - 2022-03-17
 ### Changed
@ -18,7 +39,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

 ## 0.2.6 - 2021-10-04
 ### Added
- Add `pad_functional_xilinx
+- Add `pad_functional_xilinx`

 ### Fixed
 - Bender targets
--- a/vendor/pulp-platform/tech_cells_generic/README.md
+++ b/vendor/pulp-platform/tech_cells_generic/README.md
@ -19,7 +19,7 @@ If you want to get started in your own technology (either an unsupported FPGA or

 Clock cells usually are care-fully designed cells which do not exhibit any glitches. Therefore they need to be manually instantiated in ASIC designs. All clock cells can be found in `tc_clk.sv`.

-|        Name       |         Description          | Status |       Xilinx       |
+| Name              | Description                  | Status | Xilinx             |
 |-------------------|------------------------------|--------|--------------------|
 | `tc_clk_and2`     | Clock and gate               | active | :white_check_mark: |
 | `tc_clk_buffer`   | Clock buffer                 | active | :white_check_mark: |
@ -27,6 +27,7 @@ Clock cells usually are care-fully designed cells which do not exhibit any glitc
 | `tc_clk_inverter` | Clock inverter               | active | :white_check_mark: |
 | `tc_clk_mux2`     | Clock Mux with two inputs    | active | :white_check_mark: |
 | `tc_clk_xor2`     | Clock Xor                    | active | :white_check_mark: |
+| `tc_clk_or2`      | Clock Or                     | active | :white_check_mark: |
 | `tc_clk_delay`    | Programmable clock-delay     | active |                    |

 ### Memory
--- a/vendor/pulp-platform/tech_cells_generic/src/fpga/tc_clk_xilinx.sv
+++ b/vendor/pulp-platform/tech_cells_generic/src/fpga/tc_clk_xilinx.sv
@ -30,7 +30,14 @@ module tc_clk_buffer (
 endmodule

 // Disable clock gating on FPGA as it behaves differently than expected
-module tc_clk_gating (
+module tc_clk_gating #(
+  /// This paramaeter is a hint for tool/technology specific mappings of this
+  /// tech_cell. It indicates wether this particular clk gate instance is
+  /// required for functional correctness or just instantiated for power
+  /// savings. If IS_FUNCTIONAL == 0, technology specific mappings might
+  /// replace this cell with a feedthrough connection without any gating.
+  parameter bit IS_FUNCTIONAL = 1'b1
+)(
   input  logic clk_i,
   input  logic en_i,
   input  logic test_en_i,
@ -76,3 +83,14 @@ module tc_clk_xor2 (

 endmodule

+module tc_clk_or2 (
+  input logic clk0_i,
+  input logic clk1_i,
+  output logic clk_o
+);
+
+  assign clk_o = clk0_i | clk1_i;
+
+endmodule
+
+
--- a/vendor/pulp-platform/tech_cells_generic/src/fpga/tc_sram_xilinx.sv
+++ b/vendor/pulp-platform/tech_cells_generic/src/fpga/tc_sram_xilinx.sv
@ -24,6 +24,7 @@ module tc_sram #(
  parameter int unsigned Latency      = 32'd1,    // Latency when the read data is available
  parameter              SimInit      = "zeros",  // Simulation initialization, fixed to zero here!
  parameter bit          PrintSimCfg  = 1'b0,     // Print configuration
+  parameter              ImplKey      = "none",   // Reference to specific implementation
  // DEPENDENT PARAMETERS, DO NOT OVERWRITE!
  parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
  parameter int unsigned BeWidth   = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
@ -43,38 +44,47 @@ module tc_sram #(
  output data_t [NumPorts-1:0] rdata_o     // read data
 );

-  localparam int unsigned DataWidthAligned = ByteWidth * BeWidth;
+
+  // XPM only supports a byte width of 8. Hence, map each input byte to a multiple of 8 bit
+  // Number of 8-bit bytes (memory bytes) per data byte
+  localparam int unsigned BytesPerByte     = (ByteWidth + 7) / 8;
+  // Number of allocated memory bits per data byte
+  localparam int unsigned ByteWidthAligned = BytesPerByte * 8;
+  // Resulting memory width and size
+  localparam int unsigned DataWidthAligned = ByteWidthAligned * BeWidth;
  localparam int unsigned Size             = NumWords * DataWidthAligned;

-  typedef logic [DataWidthAligned-1:0] data_aligned_t;
+  typedef logic [DataWidthAligned-1:0]     data_aligned_t;
+  typedef logic [BytesPerByte*BeWidth-1:0] be_aligned_t;

+  data_aligned_t [NumPorts-1:0] wdata_pad;
+  data_aligned_t [NumPorts-1:0] rdata_pad;
  data_aligned_t [NumPorts-1:0] wdata_al;
  data_aligned_t [NumPorts-1:0] rdata_al;
-  be_t           [NumPorts-1:0] we;
+  be_aligned_t   [NumPorts-1:0] be_al;
+  be_aligned_t   [NumPorts-1:0] we_al;

-  // pad with 0 to next byte for inferable macro below, as the macro wants
-  // READ_DATA_WIDTH_A be a multiple of BYTE_WRITE_WIDTH_A
-  always_comb begin : p_align
-    wdata_al = '0;
-    for (int unsigned i = 0; i < NumPorts; i++) begin
-      wdata_al[i][DataWidth-1:0] = wdata_i[i];
+  for (genvar i = 0; i < NumPorts; i++) begin : gen_align
+    // Zero-pad data to allow bit select
+    assign wdata_pad[i] = data_aligned_t'(wdata_i[i]);
+    assign rdata_o[i]   = data_t'(rdata_pad[i]);
+    for (genvar j = 0; j < BeWidth; j++) begin
+        // Unpack data
+        assign wdata_al[i][j*ByteWidthAligned+:ByteWidthAligned] = ByteWidthAligned'(wdata_pad[i][j*ByteWidth+:ByteWidth]);
+        assign rdata_pad[i][j*ByteWidth+:ByteWidth]              = ByteWidth'(rdata_al[i][j*ByteWidthAligned+:ByteWidthAligned]);
+        // In case ByteWidth > 8, let each be_i drive the corresponding number of memory be
+        assign be_al[i][j*BytesPerByte+:BytesPerByte]            = {BytesPerByte{be_i[i][j]}};
+        assign we_al[i][j*BytesPerByte+:BytesPerByte]            = {BytesPerByte{be_i[i][j] & we_i[i]}};
    end
  end

-  for (genvar i = 0; i < NumPorts; i++) begin : gen_port_assign
-    for (genvar j = 0; j < BeWidth; j++) begin : gen_we_assign
-      assign we[i][j] = be_i[i][j] & we_i[i];
-    end
-    assign rdata_o[i] = data_t'(rdata_al[i]);
-  end
-
  if (NumPorts == 32'd1) begin : gen_1_ports
    // xpm_memory_spram: Single Port RAM
    // XilinxParameterizedMacro, version 2018.1
    xpm_memory_spram#(
      .ADDR_WIDTH_A        ( AddrWidth        ), // DECIMAL
      .AUTO_SLEEP_TIME     ( 0                ), // DECIMAL
-      .BYTE_WRITE_WIDTH_A  ( ByteWidth        ), // DECIMAL
+      .BYTE_WRITE_WIDTH_A  ( 8                ), // DECIMAL
      .ECC_MODE            ( "no_ecc"         ), // String
      .MEMORY_INIT_FILE    ( "none"           ), // String
      .MEMORY_INIT_PARAM   ( "0"              ), // String
@ -102,7 +112,7 @@ module tc_sram #(
      .regcea   ( 1'b1         ), // 1-bit input: Clock Enable for the last register
      .rsta     ( ~rst_ni      ), // 1-bit input: Reset signal for the final port A output
      .sleep    ( 1'b0         ), // 1-bit input: sleep signal to enable the dynamic power save
-      .wea      ( we[0]        )
+      .wea      ( we_al[0]     )
    );
  end else if (NumPorts == 32'd2) begin : gen_2_ports
    // xpm_memory_tdpram: True Dual Port RAM
@ -111,8 +121,8 @@ module tc_sram #(
      .ADDR_WIDTH_A            ( AddrWidth        ), // DECIMAL
      .ADDR_WIDTH_B            ( AddrWidth        ), // DECIMAL
      .AUTO_SLEEP_TIME         ( 0                ), // DECIMAL
-      .BYTE_WRITE_WIDTH_A      ( ByteWidth        ), // DECIMAL
-      .BYTE_WRITE_WIDTH_B      ( ByteWidth        ), // DECIMAL
+      .BYTE_WRITE_WIDTH_A      ( 8                ), // DECIMAL
+      .BYTE_WRITE_WIDTH_B      ( 8                ), // DECIMAL
      .CLOCKING_MODE           ( "common_clock"   ), // String
      .ECC_MODE                ( "no_ecc"         ), // String
      .MEMORY_INIT_FILE        ( "none"           ), // String
@ -158,8 +168,8 @@ module tc_sram #(
      .rsta     ( ~rst_ni      ), // 1-bit input: Reset signal for the final port A output
      .rstb     ( ~rst_ni      ), // 1-bit input: Reset signal for the final port B output
      .sleep    ( 1'b0         ), // 1-bit input: sleep signal to enable the dynamic power
-      .wea      ( we[0]        ), // WRITE_DATA_WIDTH_A-bit input: Write enable vector for port A
-      .web      ( we[1]        )  // WRITE_DATA_WIDTH_B-bit input: Write enable vector for port B
+      .wea      ( we_al[0]     ), // WRITE_DATA_WIDTH_A-bit input: Write enable vector for port A
+      .web      ( we_al[1]     )  // WRITE_DATA_WIDTH_B-bit input: Write enable vector for port B
    );
  end else begin : gen_err_ports
    $fatal(1, "Not supported port parametrization for NumPorts: %0d", NumPorts);
--- a/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv
+++ b/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv
@ -61,6 +61,16 @@ module tc_clk_inverter (

 endmodule

+// Warning: Typical clock mux cells of a technologies std cell library ARE NOT
+// GLITCH FREE!! The only difference to a regular multiplexer cell is that they
+// feature balanced rise- and fall-times. In other words: SWITCHING FROM ONE
+// CLOCK TO THE OTHER CAN INTRODUCE GLITCHES. ALSO, GLITCHES ON THE SELECT LINE
+// DIRECTLY TRANSLATE TO GLITCHES ON THE OUTPUT CLOCK!! This cell is only
+// intended to be used for quasi-static switching between clocks when one of the
+// clocks is anyway inactive or if the downstream logic remains gated or in
+// reset state during the transition phase. If you need dynamic switching
+// between arbitrary input clocks without introducing glitches, have a look at
+// the clk_mux_glitch_free cell in the pulp-platform/common_cells repository.
 module tc_clk_mux2 (
  input  logic clk0_i,
  input  logic clk1_i,
@ -82,6 +92,16 @@ module tc_clk_xor2 (

 endmodule

+module tc_clk_or2 (
+  input logic clk0_i,
+  input logic clk1_i,
+  output logic clk_o
+);
+
+  assign clk_o = clk0_i | clk1_i;
+
+endmodule
+
 `ifndef SYNTHESIS
 module tc_clk_delay #(
  parameter int unsigned Delay = 300ps
@ -98,5 +118,3 @@ module tc_clk_delay #(

 endmodule
 `endif
-
-
--- a/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv
+++ b/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv
@ -30,6 +30,9 @@
 //                "none":   Each bit gets initialized with 1'bx. (default)
 // - PrintSimCfg: Prints at the beginning of the simulation a `Hello` message with
 //                the instantiated parameters and signal widths.
+// - ImplKey:     Key by which an instance can refer to a specific implementation (e.g. macro).
+//                May be used to look up additional parameters for implementation (e.g. generator,
+//                line width, muxing) in an external reference, such as a configuration file.
 //
 // Ports:
 // - `clk_i`:   Clock
@ -58,6 +61,7 @@ module tc_sram #(
  parameter int unsigned Latency      = 32'd1,    // Latency when the read data is available
  parameter              SimInit      = "none",   // Simulation initialization
  parameter bit          PrintSimCfg  = 1'b0,     // Print configuration
+  parameter              ImplKey      = "none",   // Reference to specific implementation
  // DEPENDENT PARAMETERS, DO NOT OVERWRITE!
  parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1,
  parameter int unsigned BeWidth   = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div
--- a/vendor/pulp-platform_tech_cells_generic.lock.hjson
+++ b/vendor/pulp-platform_tech_cells_generic.lock.hjson
@ -9,6 +9,6 @@
  upstream:
  {
    url: https://github.com/pulp-platform/tech_cells_generic.git
-    rev: b2a68114302af1d8191ddf34ea0e07b471911866
+    rev: 7968dd6e6180df2c644636bc6d2908a49f2190cf
  }
 }
--- a/vendor/pulp-platform_tech_cells_generic.vendor.hjson
+++ b/vendor/pulp-platform_tech_cells_generic.vendor.hjson
@ -15,7 +15,7 @@
        // URL
        url: "https://github.com/pulp-platform/tech_cells_generic.git",
        // revision
-        rev: "b2a68114302af1d8191ddf34ea0e07b471911866",
+        rev: "v0.2.13",
    }

    // Patch dir for local changes