👾 Fix timing loop and unpacked assign

2025-04-20 04:07:36 -04:00 · 2018-04-18 18:53:18 +02:00 · 2018-04-18 18:53:18 +02:00 · 23037ff55f
commit 23037ff55f
parent 71f407b65d
10 changed files with 166 additions and 79 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -3,7 +3,7 @@
 	url = https://github.com/pulp-platform/riscv-torture.git
 [submodule "tb"]
 	path = tb
-	url = https://github.com/pulp-platform/uvm-components.git
+	url = https://github.com/stmach/uvm-components.git
 [submodule "src/axi_mem_if"]
 	path = src/axi_mem_if
 	url = https://github.com/pulp-platform/axi_mem_if.git
--- a/Bender.yml
+++ b/Bender.yml
@ -12,6 +12,40 @@ dependencies:
  common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: 1.1.0 }

 sources:
+  - src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/control_mvp.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/div_sqrt_mvp_wrapper.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/div_sqrt_top_mvp.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/iteration_div_sqrt_mvp.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/norm_div_sqrt_mvp.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/nrbd_nrsc_mvp.sv
+  - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/preprocess_mvp.sv
+  - src/fpnew/src/pkg/fpnew_pkg.vhd
+  - src/fpnew/src/pkg/fpnew_fmts_pkg.vhd
+  - src/fpnew/src/pkg/fpnew_comps_pkg.vhd
+  - src/fpnew/src/pkg/fpnew_pkg_constants.vhd
+  - src/fpnew/src/utils/fp_pipe.vhd
+  - src/fpnew/src/utils/fp_rounding.vhd
+  - src/fpnew/src/utils/fp_arbiter.vhd
+  - src/fpnew/src/ops/fma_core.vhd
+  - src/fpnew/src/ops/fp_fma.vhd
+  - src/fpnew/src/ops/fp_divsqrt_multi.vhd
+  - src/fpnew/src/ops/fp_noncomp.vhd
+  - src/fpnew/src/ops/fp_f2fcasts.vhd
+  - src/fpnew/src/ops/fp_f2icasts.vhd
+  - src/fpnew/src/ops/fp_i2fcasts.vhd
+  - src/fpnew/src/ops/fp_conv_multi.vhd
+  - src/fpnew/src/subunits/addmul_fmt_slice.vhd
+  - src/fpnew/src/subunits/addmul_block.vhd
+  - src/fpnew/src/subunits/divsqrt_multifmt_slice.vhd
+  - src/fpnew/src/subunits/divsqrt_block.vhd
+  - src/fpnew/src/subunits/noncomp_fmt_slice.vhd
+  - src/fpnew/src/subunits/noncomp_block.vhd
+  - src/fpnew/src/subunits/conv_multifmt_slice.vhd
+  - src/fpnew/src/subunits/conv_block.vhd
+  - src/fpnew/src/fpnew.vhd
+  - src/fpnew/src/fpnew_top.vhd
  - include/ariane_pkg.sv
  - include/nbdcache_pkg.sv
  - target: not(synthesis)
@ -19,6 +53,7 @@ sources:
      - src/util/instruction_tracer_pkg.sv
      - src/util/instruction_tracer_if.sv
  - src/alu.sv
+  - src/fpu_wrap.sv
  - src/ariane.sv
  - src/branch_unit.sv
  - src/cache_ctrl.sv
@ -58,3 +93,5 @@ sources:
  - src/store_buffer.sv
  - src/store_unit.sv
  - src/tlb.sv
+
+
--- a/src/ariane_regfile.sv
+++ b/src/ariane_regfile.sv
@ -23,7 +23,7 @@
 //                 latches and is thus smaller than the flip-flop based RF.
 //

-module ariane_regfile_latch #(
+module ariane_regfile #(
  parameter int unsigned DATA_WIDTH     = 32,
  parameter int unsigned NR_READ_PORTS  = 2,
  parameter int unsigned NR_WRITE_PORTS = 2,
@ -46,7 +46,7 @@ module ariane_regfile_latch #(
    localparam ADDR_WIDTH = 5;;
    localparam NUM_WORDS  = 2**ADDR_WIDTH;

-    logic [NUM_WORDS-1:1]                      mem_clocks;
+    logic [NUM_WORDS-1:ZERO_REG_ZERO]          mem_clocks;

    logic [DATA_WIDTH-1:0]                     mem[NUM_WORDS];
    logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:1]  waddr_onehot,waddr_onehot_q;
--- a/src/commit_stage.sv
+++ b/src/commit_stage.sv
@ -105,17 +105,17 @@ module commit_stage #(
                    else // if the LSU buffer is not ready - do not commit, wait
                        commit_ack_o[0] = 1'b0;
                end
+
+                // ---------
+                // FPU Flags
+                // ---------
+                if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
+                    // write the CSR with potential exception flags from retiring floating point instruction
+                    csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]};
+                    csr_write_fflags_o = 1'b1;
+                end
            end

-            // ---------
-            // FPU Flags
-            // ---------
-            if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
-                // write the CSR with potential exception flags from retiring floating point instruction
-                csr_op_o = CSR_SET;
-                csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]};
-                csr_write_fflags_o = 1'b1;
-            end

            // ---------
            // CSR Logic
@ -174,7 +174,6 @@ module commit_stage #(
                // additionally check if we are retiring an FPU instruction because we need to make sure that we write all
                // exception flags
                if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
-                    csr_op_o = CSR_SET;
                    if (csr_write_fflags_o)
                        csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])};
                    else
--- a/src/csr_regfile.sv
+++ b/src/csr_regfile.sv
@ -45,7 +45,7 @@ module csr_regfile #(
    input  logic  [11:0]          csr_addr_i,                 // Address of the register to read/write
    input  logic  [63:0]          csr_wdata_i,                // Write data in
    output logic  [63:0]          csr_rdata_o,                // Read data out
-    input  logic                  csr_write_fflags_i,         // Write fflags register
+    input  logic                  csr_write_fflags_i,         // Write fflags register e.g.: we are retiring a floating point instruction
    input  logic  [63:0]          pc_i,                       // PC of instruction accessing the CSR
    output exception_t            csr_exception_o,            // attempts to access a CSR without appropriate privilege
                                                              // level or to write  a read-only register also
@ -98,7 +98,7 @@ module csr_regfile #(
    // Assignments
    // ----------------
    // Debug MUX and fflags register
-    assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : (csr_write_fflags_i) ? CSR_FFLAGS : csr_addr_i));
+    assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : csr_addr_i));
    // Output the read data directly
    assign debug_csr_rdata_o = csr_rdata;

@ -431,6 +431,11 @@ module csr_regfile #(
                default: update_access_exception = 1'b1;
            endcase
        end
+
+        // write the floating point status register
+        if (csr_write_fflags_i)
+            fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags;
+
        // ---------------------
        // External Interrupts
        // ---------------------
@ -677,9 +682,6 @@ module csr_regfile #(
                csr_exception_o.valid = 1'b1;
            end
        end
-        // in case we are writing the CSR flag no exception can ever occur, don't set the valid flag in that case
-        if (csr_write_fflags_i)
-            csr_exception_o.valid = 1'b0;

        // -------------------
        // Wait for Interrupt
--- a/src/ff1.sv
+++ b/src/ff1.sv
@ -1,44 +1,53 @@
-// Copyright 2018 ETH Zurich and University of Bologna.
-// Copyright and related rights are licensed under the Solderpad Hardware
-// License, Version 0.51 (the "License"); you may not use this file except in
-// compliance with the License.  You may obtain a copy of the License at
-// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-// or agreed to in writing, software, hardware and materials distributed under
-// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
+// Copyright (c) 2018 ETH Zurich, University of Bologna
+// All rights reserved.
 //
-// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
-// Date: 05.06.2017
-// Description: Finds first one
+// This code is under development and not yet released to the public.
+// Until it is released, the code is under the copyright of ETH Zurich and
+// the University of Bologna, and may contain confidential and/or unpublished
+// work. Any reuse/redistribution is strictly forbidden without written
+// permission from ETH Zurich.
+//
+// Bug fixes and contributions will eventually be released under the
+// SolderPad open hardware license in the context of the PULP platform
+// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
+// University of Bologna.

-// -----------------
-// Find First One
-// -----------------
-module ff1 #(
-    parameter int unsigned LEN = 32
+
+/// A leading-one finder / leading zero counter.
+/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB)
+/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB)
+module find_first_one #(
+    /// The width of the input vector.
+    parameter int WIDTH = -1,
+    parameter int FLIP = 0
 )(
-    input  logic [LEN-1:0]         in_i,
-    output logic [$clog2(LEN)-1:0] first_one_o,
-    output logic                   no_ones_o
+    input  logic [WIDTH-1:0]         in_i,
+    output logic [$clog2(WIDTH)-1:0] first_one_o,
+    output logic                     no_ones_o
 );

-localparam int unsigned NUM_LEVELS = $clog2(LEN);
+    localparam int NUM_LEVELS = $clog2(WIDTH);

-logic [LEN-1:0] [NUM_LEVELS-1:0]           index_lut;
-logic [2**NUM_LEVELS-1:0]                  sel_nodes;
-logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes;
-
-// ----------------------------
-// Generate Tree Structure
-// ----------------------------
-generate
-    for (genvar j = 0; j < LEN; j++) begin
-        assign index_lut[j] = $unsigned(j[NUM_LEVELS-1:0]);
+    // pragma translate_off
+    initial begin
+        assert(WIDTH >= 0);
+    end
+    // pragma translate_on
+
+    logic [WIDTH-1:0][NUM_LEVELS-1:0]          index_lut;
+    logic [2**NUM_LEVELS-1:0]                  sel_nodes;
+    logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0]  index_nodes;
+
+    logic [WIDTH-1:0] in_tmp;
+
+    for (genvar i = 0; i < WIDTH; i++) begin
+        assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i];
+    end
+
+    for (genvar j = 0; j < WIDTH; j++) begin
+        assign index_lut[j] = j;
    end
-endgenerate

-generate
    for (genvar level = 0; level < NUM_LEVELS; level++) begin

        if (level < NUM_LEVELS-1) begin
@ -52,29 +61,25 @@ generate
        if (level == NUM_LEVELS-1) begin
            for (genvar k = 0; k < 2**level; k++) begin
                // if two successive indices are still in the vector...
-                if (k * 2 < LEN) begin
-                    assign sel_nodes[2**level-1+k]   = in_i[k*2] | in_i[k*2+1];
-                    assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
+                if (k * 2 < WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2] | in_tmp[k*2+1];
+                    assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
                end
                // if only the first index is still in the vector...
-                if (k * 2 == LEN) begin
-                    assign sel_nodes[2**level-1+k]   = in_i[k*2];
+                if (k * 2 == WIDTH-1) begin
+                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2];
                    assign index_nodes[2**level-1+k] = index_lut[k*2];
                end
                // if index is out of range
-                if (k * 2 > LEN) begin
+                if (k * 2 > WIDTH-1) begin
                    assign sel_nodes[2**level-1+k]   = 1'b0;
                    assign index_nodes[2**level-1+k] = '0;
                end
            end
        end
    end
-endgenerate

-// --------------------
-// Connect Output
-// --------------------
-assign first_one_o = index_nodes[0];
-assign no_ones_o   = ~sel_nodes[0];
+    assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0;
+    assign no_ones_o   = NUM_LEVELS > 0 ? ~sel_nodes[0]  : '1;

-endmodule
+endmodule
--- a/src/fpu_wrap.sv
+++ b/src/fpu_wrap.sv
@ -75,7 +75,37 @@ module fpu_wrap (
    logic [IFMTBITS-1:0] IFMT_INT64;

    // bind the constants from the fpnew entity
-    fpnew_pkg_constants i_fpnew_constants ( .* );
+    fpnew_pkg_constants i_fpnew_constants (
+        .OP_NUMBITS   ( OP_NUMBITS   ),
+        .OP_FMADD     ( OP_FMADD     ),
+        .OP_FNMSUB    ( OP_FNMSUB    ),
+        .OP_ADD       ( OP_ADD       ),
+        .OP_MUL       ( OP_MUL       ),
+        .OP_DIV       ( OP_DIV       ),
+        .OP_SQRT      ( OP_SQRT      ),
+        .OP_SGNJ      ( OP_SGNJ      ),
+        .OP_MINMAX    ( OP_MINMAX    ),
+        .OP_CMP       ( OP_CMP       ),
+        .OP_CLASS     ( OP_CLASS     ),
+        .OP_F2I       ( OP_F2I       ),
+        .OP_I2F       ( OP_I2F       ),
+        .OP_F2F       ( OP_F2F       ),
+        .OP_CPK       ( OP_CPK       ),
+        .FMT_NUMBITS  ( FMT_NUMBITS  ),
+        .FMT_FP32     ( FMT_FP32     ),
+        .FMT_FP64     ( FMT_FP64     ),
+        .FMT_FP16     ( FMT_FP16     ),
+        .FMT_FP8      ( FMT_FP8      ),
+        .FMT_FP16ALT  ( FMT_FP16ALT  ),
+        .FMT_CUST1    ( FMT_CUST1    ),
+        .FMT_CUST2    ( FMT_CUST2    ),
+        .FMT_CUST3    ( FMT_CUST3    ),
+        .IFMT_NUMBITS ( IFMT_NUMBITS ),
+        .IFMT_INT8    ( IFMT_INT8    ),
+        .IFMT_INT16   ( IFMT_INT16   ),
+        .IFMT_INT32   ( IFMT_INT32   ),
+        .IFMT_INT64   ( IFMT_INT64   )
+    );

    // always_comb begin
    //     assert (OPBITS >= OP_NUMBITS) else $error("OPBITS is smaller than %0d", OP_NUMBITS);
--- a/src/icache.sv
+++ b/src/icache.sv
@ -410,8 +410,8 @@ module icache #(
            ready_o = 1'b0;
    end

-    ff1 #(
-        .LEN ( SET_ASSOCIATIVITY )
+    find_first_one #(
+        .WIDTH ( SET_ASSOCIATIVITY )
    ) i_ff1 (
        .in_i        ( ~way_valid    ),
        .first_one_o ( repl_invalid  ),
--- a/src/issue_read_operands.sv
+++ b/src/issue_read_operands.sv
@ -367,6 +367,16 @@ module issue_read_operands #(
    // Integer Register File
    // ----------------------
    logic [1:0][63:0] rdata;
+    logic [1:0][4:0]  raddr_pack;
+
+    // pack signals
+    logic [NR_COMMIT_PORTS-1:0][4:0]  waddr_pack;
+    logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack;
+    logic [NR_COMMIT_PORTS-1:0]       we_pack;
+    assign raddr_pack = {issue_instr_i.rs2[4:0], raddr_a};
+    assign waddr_pack = {waddr_i[1], waddr};
+    assign wdata_pack = {wdata_i[1], wdata};
+    assign we_pack    = {we_gpr_i[1], we};

    ariane_regfile #(
        .DATA_WIDTH     ( 64              ),
@ -374,11 +384,11 @@ module issue_read_operands #(
        .NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
        .ZERO_REG_ZERO  ( 1               )
    ) i_ariane_regfile (
-        .raddr_i   ( '{issue_instr_i.rs2[4:0], raddr_a} ),
-        .rdata_o   ( rdata                              ),
-        .waddr_i   ( '{waddr_i[1], waddr}               ),
-        .wdata_i   ( '{wdata_i[1], wdata}               ),
-        .we_i      ( '{we_gpr_i[1], we}                 ),
+        .raddr_i   ( raddr_pack ),
+        .rdata_o   ( rdata      ),
+        .waddr_i   ( waddr_pack ),
+        .wdata_i   ( wdata_pack ),
+        .we_i      ( we_pack    ),
        .*
    );

@ -387,6 +397,10 @@ module issue_read_operands #(
    // -----------------------------
    logic [2:0][FLEN-1:0] fprdata;

+    // pack signals
+    logic [2:0][4:0]  fp_raddr_pack;
+    assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
+
    generate
        if (FP_PRESENT) begin : float_regfile_gen
            ariane_regfile #(
@ -395,11 +409,11 @@ module issue_read_operands #(
                .NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
                .ZERO_REG_ZERO  ( 0               )
            ) i_ariane_fp_regfile (
-                .raddr_i   ( '{issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]} ),
-                .rdata_o   ( fprdata                                                                      ),
-                .waddr_i   ( waddr_i                                                                      ),
-                .wdata_i   ( '{wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]}                                ),
-                .we_i      ( we_fpr_i                                                                     ),
+                .raddr_i   ( fp_raddr_pack ),
+                .rdata_o   ( fprdata       ),
+                .waddr_i   ( waddr_pack    ),
+                .wdata_i   ( wdata_pack    ),
+                .we_i      ( we_fpr_i      ),
                .*
            );
        end else begin : no_fpr_gen
--- a/src/mult.sv
+++ b/src/mult.sv
@ -141,8 +141,8 @@ module mult (
    // Find First one
    // ---------------------
    // this unit is used to speed up the sequential division by shifting the dividend first
-    ff1 #(
-        .LEN         ( 64         )
+    find_first_one #(
+        .WIDTH       ( 64         )
    ) i_ff1 (
        .in_i        ( ff1_input  ), // signed = operand_b_rev_neg, unsigned operand_b_rev
        .first_one_o ( ff1_result ),