Cvfpu from vendor to submodule (#2070)

This commit is contained in:
JeanRochCoulon 2024-04-23 14:54:42 +02:00 committed by GitHub
parent 3515908315
commit 3ecabdb95a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 23 additions and 10589 deletions

3
.gitmodules vendored
View file

@ -37,6 +37,9 @@
[submodule "verif/core-v-verif"]
path = verif/core-v-verif
url = https://github.com/openhwgroup/core-v-verif
[submodule "core/cvfpu"]
path = core/cvfpu
url = https://github.com/openhwgroup/cvfpu.git
[submodule "core/cache_subsystem/hpdcache"]
path = core/cache_subsystem/hpdcache
url = https://github.com/openhwgroup/cv-hpdcache.git

View file

@ -37,25 +37,25 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv
+incdir+${CVA6_REPO_DIR}/common/local/util/
// Floating point unit
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_top.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_pkg.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_cast_multi.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_classifier.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_divsqrt_multi.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_fma_multi.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_fma.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_noncomp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_opgroup_block.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_opgroup_fmt_slice.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_rounding.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_top.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
${CVA6_REPO_DIR}/core/include/config_pkg.sv
${CVA6_REPO_DIR}/core/include/${TARGET_CFG}_config_pkg.sv

1
core/cvfpu Submodule

@ -0,0 +1 @@
Subproject commit 3116391bf66660f806b45e212b9949c528b4e270

View file

@ -1,3 +0,0 @@
*~
html
Bender.lock

View file

@ -1,33 +0,0 @@
cff-version: 1.2.0
message: "If you use FPnew, please cite it as below."
authors:
- family-names: "Mach"
given-names: "Stefan"
orcid: "https://orcid.org/0000-0002-3476-8857"
title: "FPnew: - New Floating-Point Unit with Transprecision Capabilities"
version: 0.6.6
url: "https://github.com/pulp-platform/fpnew"
preferred-citation:
type: article
authors:
- family-names: "Mach"
given-names: "Stefan"
orcid: "https://orcid.org/0000-0002-3476-8857"
- family-names: "Schuiki"
given-names: "Fabian"
orcid: "https://orcid.org/0000-0002-9923-5031"
- family-names: "Zaruba"
given-names: "Florian"
orcid: "https://orcid.org/0000-0002-8194-6521"
- family-names: "Benini"
given-names: "Luca"
orcid: "https://orcid.org/0000-0001-8068-3806"
doi: "10.1109/TVLSI.2020.3044752"
journal: "IEEE Transactions on Very Large Scale Integration (VLSI) Systems"
month: 12
start: 774
end: 787
title: "FPnew: An Open-Source Multiformat Floating-Point Unit Architecture for Energy-Proportional Transprecision Computing"
issue: 4
volume: 29
year: 2020

View file

@ -1,176 +0,0 @@
SOLDERPAD HARDWARE LICENSE version 0.51
This license is based closely on the Apache License Version 2.0, but is not
approved or endorsed by the Apache Foundation. A copy of the non-modified
Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
As this license is not currently OSI or FSF approved, the Licensor permits any
Work licensed under this License, at the option of the Licensee, to be treated
as licensed under the Apache License Version 2.0 (which is so approved).
This License is licensed under the terms of this License and in particular
clause 7 below (Disclaimer of Warranties) applies in relation to its use.
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
“License” shall mean the terms and conditions for use, reproduction, and
distribution as defined by Sections 1 through 9 of this document.
“Licensor” shall mean the Rights owner or entity authorized by the Rights owner
that is granting the License.
“Legal Entity” shall mean the union of the acting entity and all other entities
that control, are controlled by, or are under common control with that entity.
For the purposes of this definition, “control” means (i) the power, direct or
indirect, to cause the direction or management of such entity, whether by
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
“You” (or “Your”) shall mean an individual or Legal Entity exercising
permissions granted by this License.
“Rights” means copyright and any similar right including design right (whether
registered or unregistered), semiconductor topography (mask) rights and
database rights (but excluding Patents and Trademarks).
“Source” form shall mean the preferred form for making modifications, including
but not limited to source code, net lists, board layouts, CAD files,
documentation source, and configuration files.
“Object” form shall mean any form resulting from mechanical transformation or
translation of a Source form, including but not limited to compiled object
code, generated documentation, the instantiation of a hardware design and
conversions to other media types, including intermediate forms such as
bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
works).
“Work” shall mean the work of authorship, whether in Source form or other
Object form, made available under the License, as indicated by a Rights notice
that is included in or attached to the work (an example is provided in the
Appendix below).
“Derivative Works” shall mean any work, whether in Source or Object form, that
is based on (or derived from) the Work and for which the editorial revisions,
annotations, elaborations, or other modifications represent, as a whole, an
original work of authorship. For the purposes of this License, Derivative Works
shall not include works that remain separable from, or merely link (or bind by
name) or physically connect to or interoperate with the interfaces of, the Work
and Derivative Works thereof.
“Contribution” shall mean any design or work of authorship, including the
original version of the Work and any modifications or additions to that Work or
Derivative Works thereof, that is intentionally submitted to Licensor for
inclusion in the Work by the Rights owner or by an individual or Legal Entity
authorized to submit on behalf of the Rights owner. For the purposes of this
definition, “submitted” means any form of electronic, verbal, or written
communication sent to the Licensor or its representatives, including but not
limited to communication on electronic mailing lists, source code control
systems, and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but excluding
communication that is conspicuously marked or otherwise designated in writing
by the Rights owner as “Not a Contribution.”
“Contributor” shall mean Licensor and any individual or Legal Entity on behalf
of whom a Contribution has been received by Licensor and subsequently
incorporated within the Work.
2. Grant of License. Subject to the terms and conditions of this License, each
Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable license under the Rights to reproduce,
prepare Derivative Works of, publicly display, publicly perform, sublicense,
and distribute the Work and such Derivative Works in Source or Object form and
do anything in relation to the Work as if the Rights did not exist.
3. Grant of Patent License. Subject to the terms and conditions of this
License, each Contributor hereby grants to You a perpetual, worldwide,
non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
section) patent license to make, have made, use, offer to sell, sell, import,
and otherwise transfer the Work, where such license applies only to those
patent claims licensable by such Contributor that are necessarily infringed by
their Contribution(s) alone or by combination of their Contribution(s) with the
Work to which such Contribution(s) was submitted. If You institute patent
litigation against any entity (including a cross-claim or counterclaim in a
lawsuit) alleging that the Work or a Contribution incorporated within the Work
constitutes direct or contributory patent infringement, then any patent
licenses granted to You under this License for that Work shall terminate as of
the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or
Derivative Works thereof in any medium, with or without modifications, and in
Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy
of this License; and
You must cause any modified files to carry prominent notices stating that
You changed the files; and
You must retain, in the Source form of any Derivative Works that You
distribute, all copyright, patent, trademark, and attribution notices from
the Source form of the Work, excluding those notices that do not pertain to
any part of the Derivative Works; and
If the Work includes a “NOTICE” text file as part of its distribution, then
any Derivative Works that You distribute must include a readable copy of
the attribution notices contained within such NOTICE file, excluding those
notices that do not pertain to any part of the Derivative Works, in at
least one of the following places: within a NOTICE text file distributed as
part of the Derivative Works; within the Source form or documentation, if
provided along with the Derivative Works; or, within a display generated by
the Derivative Works, if and wherever such third-party notices normally
appear. The contents of the NOTICE file are for informational purposes only
and do not modify the License. You may add Your own attribution notices
within Derivative Works that You distribute, alongside or as an addendum to
the NOTICE text from the Work, provided that such additional attribution
notices cannot be construed as modifying the License. You may add Your own
copyright statement to Your modifications and may provide additional or
different license terms and conditions for use, reproduction, or
distribution of Your modifications, or for any such Derivative Works as a
whole, provided Your use, reproduction, and distribution of the Work
otherwise complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any
Contribution intentionally submitted for inclusion in the Work by You to the
Licensor shall be under the terms and conditions of this License, without any
additional terms or conditions. Notwithstanding the above, nothing herein shall
supersede or modify the terms of any separate license agreement you may have
executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names,
trademarks, service marks, or product names of the Licensor, except as required
for reasonable and customary use in describing the origin of the Work and
reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
writing, Licensor provides the Work (and each Contributor provides its
Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied, including, without limitation, any warranties
or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any risks
associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in
tort (including negligence), contract, or otherwise, unless required by
applicable law (such as deliberate and grossly negligent acts) or agreed to in
writing, shall any Contributor be liable to You for damages, including any
direct, indirect, special, incidental, or consequential damages of any
character arising as a result of this License or out of the use or inability to
use the Work (including but not limited to damages for loss of goodwill, work
stoppage, computer failure or malfunction, or any and all other commercial
damages or losses), even if such Contributor has been advised of the
possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the Work or
Derivative Works thereof, You may choose to offer, and charge a fee for,
acceptance of support, warranty, indemnity, or other liability obligations
and/or rights consistent with this License. However, in accepting such
obligations, You may act only on Your own behalf and on Your sole
responsibility, not on behalf of any other Contributor, and only if You agree
to indemnify, defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason of your
accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

View file

@ -1,174 +0,0 @@
# FPnew - New Floating-Point Unit with Transprecision Capabilities
Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog.
Maintainer: Luca Bertaccini <lbertaccini@iis.ee.ethz.ch>
Principal Author: Stefan Mach <smach@iis.ee.ethz.ch>
## Features
The FPU is a parametric design that allows generating FP hardware units for various use cases.
Even though mainly designed for use in RISC-V processors, the FPU or its sub-blocks can easily be utilized in other environments.
Our design aims to be compliant with IEEE 754-2008 and provides the following features:
### Formats
Any IEEE 754-2008 style binary floating-point format can be supported, including single-, double-, quad- and half-precision (`binary32`, `binary64`, `binary128`, `binary16`).
Formats can be defined with arbitrary number of exponent and mantissa bits through parameters and are always symmetrically biased.
Multiple FP formats can be supported concurrently, and the number of formats supported is not limited.
Multiple integer formats with arbitrary number of bits (as source or destionation of conversions) can also be defined.
### Operations
- Addition/Subtraction
- Multiplication
- Fused multiply-add in four flavours (`fmadd`, `fmsub`, `fnmadd`, `fnmsub`)
- Division<sup>1</sup>
- Square root<sup>1</sup>
- Minimum/Maximum<sup>2</sup>
- Comparisons
- Sign-Injections (`copy`, `abs`, `negate`, `copySign` etc.)
- Conversions among all supported FP formats
- Conversions between FP formats and integers (signed & unsigned) and vice versa
- Classification
Multi-format FMA operations (i.e. multiplication in one format, accumulation in another) are optionally supported.
Optionally, *packed-SIMD* versions of all the above operations can be generated for formats narrower than the FPU datapath width.
E.g.: Support for double-precision (64bit) operations and two simultaneous single-precision (32bit) operations.
It is also possible to generate only a subset of operations if e.g. divisions are not needed.
<sup>1</sup>Some compliance issues with IEEE 754-2008 are currently known to exist<br>
<sup>2</sup>Implementing IEEE 754-201x `minimumNumber` and `maximumNumber`, respectively
### Rounding modes
All IEEE 754-2008 rounding modes are supported, namely
- `roundTiesToEven`
- `roundTiesToAway`
- `roundTowardPositive`
- `roundTowardNegative`
- `roundTowardZero`
### Status Flags
All IEEE 754-2008 status flags are supported, namely
- Invalid operation (`NV`)
- Division by zero (`DZ`)
- Overflow (`OF`)
- Underflow (`UF`)
- Inexact (`NX`)
## Getting Started
### Dependencies
FPnew currently depends on the following:
- `lzc` and `rr_arb_tree` from the `common_cells` repository (https://github.com/pulp-platform/common_cells.git)
- optional: Divider and square-root unit from the `fpu-div-sqrt-mvp` repository (https://github.com/pulp-platform/fpu_div_sqrt_mvp.git)
These two repositories are included in the source code directory as git submodules, use
```bash
git submodule update --init --recursive
```
if you want to load these dependencies there.
Consider using [Bender](https://github.com/fabianschuiki/bender.git) for managing dependencies in your projects. FPnew comes with Bender support!
### Usage
The top-level module of the FPU is called `fpnew_top` and can be directly instantiated in your design.
Make sure you compile the package `fpnew_pkg` ahead of any files making references to types, parameters or functions defined there.
It is discouraged to `import` all of `fpnew_pkg` into your source files. Instead, explicitly scope references into the package like so: `fpnew_pkg::foo`.
#### Example Instantiation
```SystemVerilog
// FPU instance
fpnew_top #(
.Features ( fpnew_pkg::RV64D ),
.Implementation ( fpnew_pkg::DEFAULT_NOREGS ),
.TagType ( logic )
) i_fpnew_top (
.clk_i,
.rst_ni,
.operands_i,
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.int_fmt_i,
.vectorial_op_i,
.tag_i,
.in_valid_i,
.in_ready_o,
.flush_i,
.result_o,
.status_o,
.tag_o,
.out_valid_o,
.out_ready_i,
.busy_o
);
```
### Documentation
More in-depth documentation on the FPnew configuration, interfaces and architecture is provided in [`docs/README.md`](docs/README.md).
### Issues and Contributing
In case you find any issues with FPnew that have not been reported yet, don't hesitate to open a new [issue](https://github.com/pulp-platform/fpnew/issues) here on Github.
Please, don't use the issue tracker for support questions.
Instead, consider contacting the maintainers or consulting the [PULP forums](https://pulp-platform.org/community/index.php).
In case you would like to contribute to the project, please refer to the contributing guidelines in [`docs/CONTRIBUTING.md`](docs/CONTRIBUTING.md) before opening a pull request.
### Repository Structure
HDL source code can be found in the `src` directory while documentation is located in `docs`.
A changelog is kept at [`docs/CHANGELOG.md`](docs/CHANGELOG.md).
This repository loosely follows the [GitFlow](https://nvie.com/posts/a-successful-git-branching-model/) branching model.
This means that the `master` branch is considered stable and used to publish releases of the FPU while the `develop` branch contains features and bugfixes that have not yet been properly released.
Furthermore, this repository tries to adhere to [SemVer](https://semver.org/), as outlined in the [changelog](docs/CHANGELOG.md).
## Licensing
FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [license file](LICENSE) for further information.
## Publication
If you use FPnew in your work, you can cite us:
<details>
<summary>FPnew Publication</summary>
<p>
```
@article{mach2020fpnew,
title={Fpnew: An open-source multiformat floating-point unit architecture for energy-proportional transprecision computing},
author={Mach, Stefan and Schuiki, Fabian and Zaruba, Florian and Benini, Luca},
journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
volume={29},
number={4},
pages={774--787},
year={2020},
publisher={IEEE}
}
```
</p>
</details>
## Acknowledgement
This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 732631.
For further information, visit [oprecomp.eu](http://oprecomp.eu).
![OPRECOMP](docs/fig/oprecomp_logo_inline1.png)

View file

@ -1,794 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module fpnew_cast_multi #(
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
// FPU configuration
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
parameter type AuxType = logic,
// Do not change
localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig),
fpnew_pkg::max_int_width(IntFmtConfig)),
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [WIDTH-1:0] operands_i, // 1 operand
input logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input fpnew_pkg::int_format_e int_fmt_i,
input TagType tag_i,
input logic mask_i,
input AuxType aux_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
output logic mask_o,
output AuxType aux_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
// ----------
// Constants
// ----------
localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1;
// The internal mantissa includes normal bit or an entire integer
localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH);
// If needed, there will be a LZC for renormalization
localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH);
// The internal exponent must be able to represent the smallest denormal input value as signed
// or the number of bits in an integer
localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH),
fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1;
// Pipelines
localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
: 0); // no regs here otherwise
localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 2) / 3) // First to get distributed regs
: 0); // no regs here otherwise
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? (NumPipeRegs / 3) // Last to get distributed regs
: 0); // no regs here otherwise
// ---------------
// Input pipeline
// ---------------
// Selected pipeline output signals as non-arrays
logic [WIDTH-1:0] operands_q;
logic [NUM_FORMATS-1:0] is_boxed_q;
logic op_mod_q;
fpnew_pkg::fp_format_e src_fmt_q;
fpnew_pkg::fp_format_e dst_fmt_q;
fpnew_pkg::int_format_e int_fmt_q;
// Input pipeline signals, index i holds signal after i register stages
logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q;
logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q;
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q;
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q;
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_INP_REGS] inp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign inp_pipe_operands_q[0] = operands_i;
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
assign inp_pipe_op_q[0] = op_i;
assign inp_pipe_op_mod_q[0] = op_mod_i;
assign inp_pipe_src_fmt_q[0] = src_fmt_i;
assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
assign inp_pipe_int_fmt_q[0] = int_fmt_i;
assign inp_pipe_tag_q[0] = tag_i;
assign inp_pipe_mask_q[0] = mask_i;
assign inp_pipe_aux_q[0] = aux_i;
assign inp_pipe_valid_q[0] = in_valid_i;
// Input stage: Propagate pipeline ready signal to updtream circuitry
assign in_ready_o = inp_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
`FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0))
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS];
assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS];
assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS];
assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS];
// -----------------
// Input processing
// -----------------
logic src_is_int, dst_is_int; // if 0, it's a float
assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F);
assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I);
logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
logic [NUM_FORMATS-1:0] fmt_sign;
logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa;
logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC
fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info;
logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val;
logic int_sign;
logic [INT_MAN_WIDTH-1:0] int_value, int_mantissa;
// FP Input initialization
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
if (FpFmtConfig[fmt]) begin : active_format
// Classify input
fpnew_classifier #(
.FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
.NumOperands ( 1 )
) i_fpnew_classifier (
.operands_i ( operands_q[FP_WIDTH-1:0] ),
.is_boxed_i ( is_boxed_q[fmt] ),
.info_o ( info[fmt] )
);
assign fmt_sign[fmt] = operands_q[FP_WIDTH-1];
assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]});
assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad
// Compensation for the difference in mantissa widths used for leading-zero count
assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS);
end else begin : inactive_format
assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled
assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
end
end
// Sign-extend INT input
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (IntFmtConfig[ifmt]) begin : active_format // only active formats
always_comb begin : sign_ext_input
// sign-extend value only if it's signed
ifmt_input_val[ifmt] = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q};
ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0];
end
end else begin : inactive_format
assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
end
end
// Construct input mantissa from integer
assign int_value = ifmt_input_val[int_fmt_q];
assign int_sign = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative
assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative
// select mantissa with source format
assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q];
// --------------
// Normalization
// --------------
logic signed [INT_EXP_WIDTH-1:0] src_bias; // src format bias
logic signed [INT_EXP_WIDTH-1:0] src_exp; // src format exponent (biased)
logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal
logic signed [INT_EXP_WIDTH-1:0] src_offset; // src offset within mantissa
assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q));
assign src_exp = fmt_exponent[src_fmt_q];
assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal});
assign src_offset = fmt_shift_compensation[src_fmt_q];
logic input_sign; // input sign
logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
logic mant_is_zero; // for integer zeroes
logic signed [INT_EXP_WIDTH-1:0] fp_input_exp;
logic signed [INT_EXP_WIDTH-1:0] int_input_exp;
// Input mantissa needs to be normalized
logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount
logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations
// Leading-zero counter is needed for renormalization
lzc #(
.WIDTH ( INT_MAN_WIDTH ),
.MODE ( 1 ) // MODE = 1 counts leading zeroes
) i_lzc (
.in_i ( encoded_mant ),
.cnt_o ( renorm_shamt ),
.empty_o ( mant_is_zero )
);
assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt});
// Get the sign from the proper source
assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q];
// Realign input mantissa, append zeroes if destination is wider
assign input_mant = encoded_mant << renorm_shamt;
// Unbias exponent and compensate for shift
assign fp_input_exp = signed'(src_exp + src_subnormal - src_bias -
renorm_shamt_sgn + src_offset); // compensate for shift
assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
assign input_exp = src_is_int ? int_input_exp : fp_input_exp;
logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
// Rebias the exponent
assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q));
// ---------------
// Internal pipeline
// ---------------
// Pipeline output signals as non-arrays
logic input_sign_q;
logic signed [INT_EXP_WIDTH-1:0] input_exp_q;
logic [INT_MAN_WIDTH-1:0] input_mant_q;
logic signed [INT_EXP_WIDTH-1:0] destination_exp_q;
logic src_is_int_q;
logic dst_is_int_q;
fpnew_pkg::fp_info_t info_q;
logic mant_is_zero_q;
logic op_mod_q2;
fpnew_pkg::roundmode_e rnd_mode_q;
fpnew_pkg::fp_format_e src_fmt_q2;
fpnew_pkg::fp_format_e dst_fmt_q2;
fpnew_pkg::int_format_e int_fmt_q2;
// Internal pipeline signals, index i holds signal after i register stages
logic [0:NUM_MID_REGS] mid_pipe_input_sign_q;
logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q;
logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q;
logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q;
logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q;
logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q;
fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q;
logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q;
logic [0:NUM_MID_REGS] mid_pipe_op_mod_q;
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q;
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q;
TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
logic [0:NUM_MID_REGS] mid_pipe_mask_q;
AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
logic [0:NUM_MID_REGS] mid_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_MID_REGS] mid_pipe_ready;
// Input stage: First element of pipeline is taken from upstream logic
assign mid_pipe_input_sign_q[0] = input_sign;
assign mid_pipe_input_exp_q[0] = input_exp;
assign mid_pipe_input_mant_q[0] = input_mant;
assign mid_pipe_dest_exp_q[0] = destination_exp;
assign mid_pipe_src_is_int_q[0] = src_is_int;
assign mid_pipe_dst_is_int_q[0] = dst_is_int;
assign mid_pipe_info_q[0] = info[src_fmt_q];
assign mid_pipe_mant_zero_q[0] = mant_is_zero;
assign mid_pipe_op_mod_q[0] = op_mod_q;
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
assign mid_pipe_src_fmt_q[0] = src_fmt_q;
assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
assign mid_pipe_int_fmt_q[0] = int_fmt_q;
assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
// Input stage: Propagate pipeline ready signal to input pipe
assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0)
`FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0)
`FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0)
`FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0)
`FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0)
`FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0)
`FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0)
`FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0)
`FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0)
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0))
`FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0)
`FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS];
assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS];
assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS];
assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS];
assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS];
assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS];
assign info_q = mid_pipe_info_q[NUM_MID_REGS];
assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS];
assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS];
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS];
assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS];
// --------
// Casting
// --------
logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
logic [SUPER_MAN_BITS-1:0] final_mant; // mantissa after adjustments
logic [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization
logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
logic of_before_round, uf_before_round;
// Perform adjustments to mantissa and exponent
always_comb begin : cast_value
// Default assignment
final_exp = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits
preshift_mant = '0; // initialize mantissa container with zeroes
denorm_shamt = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa
of_before_round = 1'b0;
uf_before_round = 1'b0;
// Place mantissa to the left of the shifter
preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1);
// Handle INT casts
if (dst_is_int_q) begin
// By default right shift mantissa to be an integer
denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q);
// overflow: when converting to unsigned the range is larger by one
if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin
denorm_shamt = '0; // prevent shifting
of_before_round = 1'b1;
// underflow
end else if (input_exp_q < -1) begin
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
uf_before_round = 1'b1;
end
// Handle FP over-/underflows
end else begin
// Overflow or infinities (for proper rounding)
if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) ||
(~src_is_int_q && info_q.is_inf)) begin
final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value
preshift_mant = '1; // largest normal value and RS bits set
of_before_round = 1'b1;
// Denormalize underflowing values
end else if (destination_exp_q < 1 &&
destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
final_exp = '0; // denormal result
denorm_shamt = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting
uf_before_round = 1'b1;
// Limit the shift to retain sticky bits
end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
final_exp = '0; // denormal result
denorm_shamt = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky
uf_before_round = 1'b1;
end
end
end
localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R
localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
// Mantissa adjustment shift
assign destination_mant = preshift_mant >> denorm_shamt;
// Extract final mantissa and round bit, discard the normal bit (for FP)
assign {final_mant, fp_round_sticky_bits[1]} =
destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1];
assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1];
// Collapse sticky bits
assign fp_round_sticky_bits[0] = (| {destination_mant[NUM_FP_STICKY-1:0]});
assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]});
// select RS bits for destination operation
assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits;
// ----------------------------
// Rounding and classification
// ----------------------------
logic [WIDTH-1:0] pre_round_abs; // absolute value of result before rnd
logic of_after_round; // overflow
logic uf_after_round; // underflow
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format
logic [NUM_FORMATS-1:0] fmt_of_after_round;
logic [NUM_FORMATS-1:0] fmt_uf_after_round;
logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
logic [NUM_INT_FORMATS-1:0] ifmt_of_after_round;
logic rounded_sign;
logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
logic result_true_zero;
logic [WIDTH-1:0] rounded_int_res; // after possible inversion
logic rounded_int_res_zero; // after rounding
// Pack exponent and mantissa into proper rounding form
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
// Set up some constants
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
if (FpFmtConfig[fmt]) begin : active_format
always_comb begin : assemble_result
fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend
end
end else begin : inactive_format
assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Sign-extend integer result
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (IntFmtConfig[ifmt]) begin : active_format
always_comb begin : assemble_result
// sign-extend reusult
ifmt_pre_round_abs[ifmt] = '{default: final_int[INT_WIDTH-1]};
ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0];
end
end else begin : inactive_format
assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Select output with destination format and operation
assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2];
fpnew_rounding #(
.AbsWidth ( WIDTH )
) i_fpnew_rounding (
.abs_value_i ( pre_round_abs ),
.sign_i ( input_sign_q ), // source format
.round_sticky_bits_i ( round_sticky_bits ),
.rnd_mode_i ( rnd_mode_q ),
.effective_subtraction_i ( 1'b0 ), // no operation happened
.abs_rounded_o ( rounded_abs ),
.sign_o ( rounded_sign ),
.exact_zero_o ( result_true_zero )
);
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
// Detect overflows and inject sign
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
if (FpFmtConfig[fmt]) begin : active_format
always_comb begin : post_process
// detect of / uf
fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
// Assemble regular result, nan box short ones. Int zeroes need to be detected`
fmt_result[fmt] = '1;
fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q
? '0
: {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
end
end else begin : inactive_format
assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Negative integer result needs to be brought into two's complement
assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
assign rounded_int_res_zero = (rounded_int_res == '0);
// Detect integer overflows after rounding (only positives)
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (IntFmtConfig[ifmt]) begin : active_format
always_comb begin : detect_overflow
ifmt_of_after_round[ifmt] = 1'b0;
// Int result can overflow if we're at the max exponent
if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin
// Check whether the rounded MSB differs from unrounded MSB
ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2];
end
end
end else begin : inactive_format
assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE;
end
end
// Classification after rounding select by destination format
assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2];
// -------------------------
// FP Special case handling
// -------------------------
logic [WIDTH-1:0] fp_special_result;
fpnew_pkg::status_t fp_special_status;
logic fp_result_is_special;
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
// Special result construction
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
if (FpFmtConfig[fmt]) begin : active_format
always_comb begin : special_results
logic [FP_WIDTH-1:0] special_res;
special_res = info_q.is_zero
? input_sign_q << FP_WIDTH-1 // signed zero
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
// Initialize special result with ones (NaN-box)
fmt_special_result[fmt] = '1;
fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
end
end else begin : inactive_format
assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Detect special case from source format, I2F casts don't produce a special result
assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero |
info_q.is_nan |
~info_q.is_boxed);
// Signalling input NaNs raise invalid flag, otherwise no flags set
assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0};
// Assemble result according to destination format
assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format
// --------------------------
// INT Special case handling
// --------------------------
logic [WIDTH-1:0] int_special_result;
fpnew_pkg::status_t int_special_status;
logic int_result_is_special;
logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result;
// Special result construction
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (IntFmtConfig[ifmt]) begin : active_format
always_comb begin : special_results
automatic logic [INT_WIDTH-1:0] special_res;
// Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1
special_res[INT_WIDTH-2:0] = '1; // alone yields 2**(INT_WIDTH-1)-1
special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1
// Negative special case (except for nans) tie to -max or 0
if (input_sign_q && !info_q.is_nan)
special_res = ~special_res;
// Initialize special result with sign-extension
ifmt_special_result[ifmt] = '{default: special_res[INT_WIDTH-1]};
ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res;
end
end else begin : inactive_format
assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
assign int_result_is_special = info_q.is_nan | info_q.is_inf |
of_before_round | of_after_round | ~info_q.is_boxed |
(input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
// All integer special cases are invalid
assign int_special_status = '{NV: 1'b1, default: 1'b0};
// Assemble result according to destination format
assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format
// -----------------
// Result selection
// -----------------
fpnew_pkg::status_t int_regular_status, fp_regular_status;
logic [WIDTH-1:0] fp_result, int_result;
fpnew_pkg::status_t fp_status, int_status;
assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts
assign fp_regular_status.DZ = 1'b0; // no divisions
assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF
assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX;
assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f
: (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round));
assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0};
assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2];
assign fp_status = fp_result_is_special ? fp_special_status : fp_regular_status;
assign int_result = int_result_is_special ? int_special_result : rounded_int_res;
assign int_status = int_result_is_special ? int_special_status : int_regular_status;
// Final results for output pipeline
logic [WIDTH-1:0] result_d;
fpnew_pkg::status_t status_d;
logic extension_bit;
// Select output depending on special case detection
assign result_d = dst_is_int_q ? int_result : fp_result;
assign status_d = dst_is_int_q ? int_status : fp_status;
// MSB of int result decides extension, otherwise NaN box
assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1;
// ----------------
// Output Pipeline
// ----------------
// Output pipeline signals, index i holds signal after i register stages
logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q;
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_OUT_REGS] out_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_ext_bit_q[0] = extension_bit;
assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS];
assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
// Input stage: Propagate pipeline ready signal to inside pipe
assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
`FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
// Output stage: assign module outputs
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
endmodule

View file

@ -1,74 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
module fpnew_classifier #(
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
parameter int unsigned NumOperands = 1,
// Do not change
localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat)
) (
input logic [NumOperands-1:0][WIDTH-1:0] operands_i,
input logic [NumOperands-1:0] is_boxed_i,
output fpnew_pkg::fp_info_t [NumOperands-1:0] info_o
);
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
// Type definition
typedef struct packed {
logic sign;
logic [EXP_BITS-1:0] exponent;
logic [MAN_BITS-1:0] mantissa;
} fp_t;
// Iterate through all operands
for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values
fp_t value;
logic is_boxed;
logic is_normal;
logic is_inf;
logic is_nan;
logic is_signalling;
logic is_quiet;
logic is_zero;
logic is_subnormal;
// ---------------
// Classify Input
// ---------------
always_comb begin : classify_input
value = operands_i[op];
is_boxed = is_boxed_i[op];
is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1);
is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0);
is_subnormal = is_boxed && (value.exponent == '0) && !is_zero;
is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0));
is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0));
is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0);
is_quiet = is_nan && !is_signalling;
// Assign output for current input
info_o[op].is_normal = is_normal;
info_o[op].is_subnormal = is_subnormal;
info_o[op].is_zero = is_zero;
info_o[op].is_inf = is_inf;
info_o[op].is_nan = is_nan;
info_o[op].is_signalling = is_signalling;
info_o[op].is_quiet = is_quiet;
info_o[op].is_boxed = is_boxed;
end
end
endmodule

View file

@ -1,366 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module fpnew_divsqrt_multi #(
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
// FPU configuration
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER,
parameter type TagType = logic,
parameter type AuxType = logic,
// Do not change
localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig),
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [1:0][WIDTH-1:0] operands_i, // 2 operands
input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input TagType tag_i,
input logic mask_i,
input AuxType aux_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
output logic divsqrt_done_o,
input logic simd_synch_done_i,
output logic divsqrt_ready_o,
input logic simd_synch_rdy_i,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
output logic mask_o,
output AuxType aux_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
// ----------
// Constants
// ----------
// Pipelines
localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE)
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? (NumPipeRegs / 2) // Last to get distributed regs
: 0); // no regs here otherwise
localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE)
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 1) / 2) // First to get distributed regs
: 0); // no regs here otherwise
// ---------------
// Input pipeline
// ---------------
// Selected pipeline output signals as non-arrays
logic [1:0][WIDTH-1:0] operands_q;
fpnew_pkg::roundmode_e rnd_mode_q;
fpnew_pkg::operation_e op_q;
fpnew_pkg::fp_format_e dst_fmt_q;
logic in_valid_q;
// Input pipeline signals, index i holds signal after i register stages
logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_INP_REGS] inp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign inp_pipe_operands_q[0] = operands_i;
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
assign inp_pipe_op_q[0] = op_i;
assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
assign inp_pipe_tag_q[0] = tag_i;
assign inp_pipe_mask_q[0] = mask_i;
assign inp_pipe_aux_q[0] = aux_i;
assign inp_pipe_valid_q[0] = in_valid_i;
// Input stage: Propagate pipeline ready signal to updtream circuitry
assign in_ready_o = inp_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
`FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS];
assign op_q = inp_pipe_op_q[NUM_INP_REGS];
assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS];
// -----------------
// Input processing
// -----------------
logic [1:0] divsqrt_fmt;
logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit
logic input_is_fp8;
// Translate fpnew formats into divsqrt formats
always_comb begin : translate_fmt
unique case (dst_fmt_q)
fpnew_pkg::FP32: divsqrt_fmt = 2'b00;
fpnew_pkg::FP64: divsqrt_fmt = 2'b01;
fpnew_pkg::FP16: divsqrt_fmt = 2'b10;
fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11;
default: divsqrt_fmt = 2'b10; // maps also FP8 to FP16
endcase
// Only if FP8 is enabled
input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8);
// If FP8 is supported, map it to an FP16 value
divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0];
divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1];
end
// ------------
// Control FSM
// ------------
logic in_ready; // input handshake with upstream
logic div_valid, sqrt_valid; // input signalling with unit
logic unit_ready, unit_done, unit_done_q; // status signals from unit instance
logic op_starting; // high in the cycle a new operation starts
logic out_valid, out_ready; // output handshake with downstream
logic unit_busy; // valid data in flight
// FSM states
typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
fsm_state_e state_q, state_d;
// Ready synch with other lanes
// Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
assign divsqrt_ready_o = in_ready;
// Upstream ready comes from sanitization FSM, and it is synched among all the lanes
assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i;
// Valid synch with other lanes
// When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
// As soon as all the lanes are over, we can clear this FF and start with a new operation
`FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni);
// Tell the other units that this unit has finished now or in the past
assign divsqrt_done_o = unit_done_q | unit_done;
// Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
assign op_starting = div_valid | sqrt_valid;
// FSM to safely apply and receive data from DIVSQRT unit
always_comb begin : flag_fsm
// Default assignments
in_ready = 1'b0;
out_valid = 1'b0;
unit_busy = 1'b0;
state_d = state_q;
unique case (state_q)
// Waiting for work
IDLE: begin
in_ready = 1'b1; // we're ready
if (in_valid_q && unit_ready) begin // New work arrives
state_d = BUSY; // go into processing state
end
end
// Operation in progress
BUSY: begin
unit_busy = 1'b1; // data in flight
// If all the lanes are done with processing
if (simd_synch_done_i) begin
out_valid = 1'b1; // try to commit result downstream
// If downstream accepts our result
if (out_ready) begin
state_d = IDLE; // we anticipate going back to idling..
if (in_valid_q && unit_ready) begin // ..unless new work comes in
in_ready = 1'b1; // we acknowledge the instruction
state_d = BUSY; // and stay busy with it
end
// Otherwise if downstream is not ready for the result
end else begin
state_d = HOLD; // wait for the pipeline to take the data
end
end
end
// Waiting with valid result for downstream
HOLD: begin
unit_busy = 1'b1; // data in flight
out_valid = 1'b1; // try to commit result downstream
// If the result is accepted by downstream
if (out_ready) begin
state_d = IDLE; // go back to idle..
if (in_valid_q && unit_ready) begin // ..unless new work comes in
in_ready = 1'b1; // acknowledge the new transaction
state_d = BUSY; // will be busy with the next instruction
end
end
end
// fall into idle state otherwise
default: state_d = IDLE;
endcase
// Flushing overrides the other actions
if (flush_i) begin
unit_busy = 1'b0; // data is invalidated
out_valid = 1'b0; // cancel any valid data
state_d = IDLE; // go to default state
end
end
// FSM status register (asynch active low reset)
`FF(state_q, state_d, IDLE)
// Hold additional information while the operation is in progress
logic result_is_fp8_q;
TagType result_tag_q;
logic result_mask_q;
AuxType result_aux_q;
// Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
`FFL(result_is_fp8_q, input_is_fp8, op_starting, '0)
`FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
`FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
`FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
// -----------------
// DIVSQRT instance
// -----------------
logic [63:0] unit_result;
logic [WIDTH-1:0] adjusted_result, held_result_q;
fpnew_pkg::status_t unit_status, held_status_q;
logic hold_en;
div_sqrt_top_mvp i_divsqrt_lei (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
.Div_start_SI ( div_valid ),
.Sqrt_start_SI ( sqrt_valid ),
.Operand_a_DI ( divsqrt_operands[0] ),
.Operand_b_DI ( divsqrt_operands[1] ),
.RM_SI ( rnd_mode_q ),
.Precision_ctl_SI ( '0 ),
.Format_sel_SI ( divsqrt_fmt ),
.Kill_SI ( flush_i ),
.Result_DO ( unit_result ),
.Fflags_SO ( unit_status ),
.Ready_SO ( unit_ready ),
.Done_SO ( unit_done )
);
// Adjust result width and fix FP8
assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
// Hold the result when one lane has finished execution, except when all the lanes finish together
// and the result can be accepted downstream
assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready);
// The Hold register (load, no reset)
`FFLNR(held_result_q, adjusted_result, hold_en, clk_i)
`FFLNR(held_status_q, unit_status, hold_en, clk_i)
// --------------
// Output Select
// --------------
logic [WIDTH-1:0] result_d;
fpnew_pkg::status_t status_d;
// Prioritize hold register data
assign result_d = unit_done_q ? held_result_q : adjusted_result;
assign status_d = unit_done_q ? held_status_q : unit_status;
// ----------------
// Output Pipeline
// ----------------
// Output pipeline signals, index i holds signal after i register stages
logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_OUT_REGS] out_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_tag_q[0] = result_tag_q;
assign out_pipe_mask_q[0] = result_mask_q;
assign out_pipe_aux_q[0] = result_aux_q;
assign out_pipe_valid_q[0] = out_valid;
// Input stage: Propagate pipeline ready signal to inside pipe
assign out_ready = out_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
// Output stage: assign module outputs
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
assign extension_bit_o = 1'b1; // always NaN-Box result
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q});
endmodule

View file

@ -1,690 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module fpnew_fma #(
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
parameter type AuxType = logic,
localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [2:0][WIDTH-1:0] operands_i, // 3 operands
input logic [2:0] is_boxed_i, // 3 operands
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input TagType tag_i,
input logic mask_i,
input AuxType aux_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
output logic mask_o,
output AuxType aux_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
// ----------
// Constants
// ----------
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
localparam int unsigned BIAS = fpnew_pkg::bias(FpFormat);
// Precision bits 'p' include the implicit bit
localparam int unsigned PRECISION_BITS = MAN_BITS + 1;
// The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3;
localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
// Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
// datapath leakage. This is either given by the exponent bits or the width of the LZC result.
// In most reasonable FP formats the internal exponent will be wider than the LZC result.
localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH));
// Shift amount width: maximum internal mantissa size is 3p+4 bits
localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5);
// Pipelines
localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
: 0); // no regs here otherwise
localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 2) / 3) // First to get distributed regs
: 0); // no regs here otherwise
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? (NumPipeRegs / 3) // Last to get distributed regs
: 0); // no regs here otherwise
// ----------------
// Type definition
// ----------------
typedef struct packed {
logic sign;
logic [EXP_BITS-1:0] exponent;
logic [MAN_BITS-1:0] mantissa;
} fp_t;
// ---------------
// Input pipeline
// ---------------
// Input pipeline signals, index i holds signal after i register stages
logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q;
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_INP_REGS] inp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign inp_pipe_operands_q[0] = operands_i;
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
assign inp_pipe_op_q[0] = op_i;
assign inp_pipe_op_mod_q[0] = op_mod_i;
assign inp_pipe_tag_q[0] = tag_i;
assign inp_pipe_mask_q[0] = mask_i;
assign inp_pipe_aux_q[0] = aux_i;
assign inp_pipe_valid_q[0] = in_valid_i;
// Input stage: Propagate pipeline ready signal to updtream circuitry
assign in_ready_o = inp_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// -----------------
// Input processing
// -----------------
fpnew_pkg::fp_info_t [2:0] info_q;
// Classify input
fpnew_classifier #(
.FpFormat ( FpFormat ),
.NumOperands ( 3 )
) i_class_inputs (
.operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
.is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
.info_o ( info_q )
);
fp_t operand_a, operand_b, operand_c;
fpnew_pkg::fp_info_t info_a, info_b, info_c;
// Operation selection and operand adjustment
// | \c op_q | \c op_mod_q | Operation Adjustment
// |:--------:|:-----------:|---------------------
// | FMADD | \c 0 | FMADD: none
// | FMADD | \c 1 | FMSUB: Invert sign of operand C
// | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A
// | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C
// | ADD | \c 0 | ADD: Set operand A to +1.0
// | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C
// | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode
// | *others* | \c - | *invalid*
// \note \c op_mod_q always inverts the sign of the addend.
always_comb begin : op_select
// Default assignments - packing-order-agnostic
operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
operand_c = inp_pipe_operands_q[NUM_INP_REGS][2];
info_a = info_q[0];
info_b = info_q[1];
info_c = info_q[2];
// op_mod_q inverts sign of operand C
operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
unique case (inp_pipe_op_q[NUM_INP_REGS])
fpnew_pkg::FMADD: ; // do nothing
fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
fpnew_pkg::ADD: begin // Set multiplicand to +1
operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0};
info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
end
fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN
if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN)
operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0};
else
operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
end
default: begin // propagate don't cares
operand_a = '{default: fpnew_pkg::DONT_CARE};
operand_b = '{default: fpnew_pkg::DONT_CARE};
operand_c = '{default: fpnew_pkg::DONT_CARE};
info_a = '{default: fpnew_pkg::DONT_CARE};
info_b = '{default: fpnew_pkg::DONT_CARE};
info_c = '{default: fpnew_pkg::DONT_CARE};
end
endcase
end
// ---------------------
// Input classification
// ---------------------
logic any_operand_inf;
logic any_operand_nan;
logic signalling_nan;
logic effective_subtraction;
logic tentative_sign;
// Reduction for special case handling
assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf});
assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan});
assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
// Effective subtraction in FMA occurs when product and addend signs differ
assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
// The tentative sign of the FMA shall be the sign of the product
assign tentative_sign = operand_a.sign ^ operand_b.sign;
// ----------------------
// Special case handling
// ----------------------
fp_t special_result;
fpnew_pkg::status_t special_status;
logic result_is_special;
always_comb begin : special_cases
// Default assignments
special_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
special_status = '0;
result_is_special = 1'b0;
// Handle potentially mixed nan & infinity input => important for the case where infinity and
// zero are multiplied and added to a qnan.
// RISC-V mandates raising the NV exception in these cases:
// (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
special_status.NV = 1'b1; // invalid operation
// NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
end else if (any_operand_nan) begin
result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
special_status.NV = signalling_nan; // raise the invalid operation flag if signalling
// Special cases involving infinity
end else if (any_operand_inf) begin
result_is_special = 1'b1; // bypass FMA
// Effective addition of opposite infinities (±inf - ±inf) is invalid!
if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
special_status.NV = 1'b1; // invalid operation
// Handle cases where output will be inf because of inf product input
else if (info_a.is_inf || info_b.is_inf) begin
// Result is infinity with the sign of the product
special_result = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0};
// Handle cases where the addend is inf
end else if (info_c.is_inf) begin
// Result is inifinity with sign of the addend (= operand_c)
special_result = '{sign: operand_c.sign, exponent: '1, mantissa: '0};
end
end
end
// ---------------------------
// Initial exponent data path
// ---------------------------
logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
logic signed [EXP_WIDTH-1:0] tentative_exponent;
// Zero-extend exponents into signed container - implicit width extension
assign exponent_a = signed'({1'b0, operand_a.exponent});
assign exponent_b = signed'({1'b0, operand_b.exponent});
assign exponent_c = signed'({1'b0, operand_c.exponent});
// Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
// with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased.
assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
// Biased product exponent is the sum of encoded exponents minus the bias.
assign exponent_product = (info_a.is_zero || info_b.is_zero)
? 2 - signed'(BIAS) // in case the product is zero, set minimum exp.
: signed'(exponent_a + info_a.is_subnormal
+ exponent_b + info_b.is_subnormal
- signed'(BIAS));
// Exponent difference is the addend exponent minus the product exponent
assign exponent_difference = exponent_addend - exponent_product;
// The tentative exponent will be the larger of the product or addend exponent
assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
// Shift amount for addend based on exponents (unsigned as only right shifts)
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
always_comb begin : addend_shift_amount
// Product-anchored case, saturated shift (addend is only in the sticky bit)
if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
addend_shamt = 3 * PRECISION_BITS + 4;
// Addend and product will have mutual bits to add
else if (exponent_difference <= signed'(PRECISION_BITS + 2))
addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
// Addend-anchored case, saturated shift (product is only in the sticky bit)
else
addend_shamt = 0;
end
// ------------------
// Product data path
// ------------------
logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c;
logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide
logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R)
// Add implicit bits to mantissae
assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
// Mantissa multiplier (a*b)
assign product = mantissa_a * mantissa_b;
// Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
// | 000...000 | product | RS |
// <- p+2 -> <- 2p -> < 2>
assign product_shifted = product << 2; // constant shift
// -----------------
// Addend data path
// -----------------
logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on
logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky
logic sticky_before_add; // they are compressed into a single sticky bit
logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R)
logic inject_carry_in; // inject carry for subtractions if needed
// In parallel, the addend is right-shifted according to the exponent difference. Up to p bits
// are shifted out and compressed into a sticky bit.
// BEFORE THE SHIFT:
// | mantissa_c | 000..000 |
// <- p -> <- 3p+4 ->
// AFTER THE SHIFT:
// | 000..........000 | mantissa_c | 000...............0GR | sticky bits |
// <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p ->
assign {addend_after_shift, addend_sticky_bits} =
(mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
assign sticky_before_add = (| addend_sticky_bits);
// assign addend_after_shift[0] = sticky_before_add;
// In case of a subtraction, the addend is inverted
assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
assign inject_carry_in = effective_subtraction & ~sticky_before_add;
// ------
// Adder
// ------
logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry
logic sum_carry; // observe carry bit from sum for sign fixing
logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow
logic final_sign;
//Mantissa adder (ab+c). In normal addition, it cannot overflow.
assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
assign sum_carry = sum_raw[3*PRECISION_BITS+4];
// Complement negative sum (can only happen in subtraction -> overflows for positive results)
assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
// In case of a mispredicted subtraction result, do a sign flip
assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
? 1'b1
: (effective_subtraction ? 1'b0 : tentative_sign);
// ---------------
// Internal pipeline
// ---------------
// Pipeline output signals as non-arrays
logic effective_subtraction_q;
logic signed [EXP_WIDTH-1:0] exponent_product_q;
logic signed [EXP_WIDTH-1:0] exponent_difference_q;
logic signed [EXP_WIDTH-1:0] tentative_exponent_q;
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
logic sticky_before_add_q;
logic [3*PRECISION_BITS+3:0] sum_q;
logic final_sign_q;
fpnew_pkg::roundmode_e rnd_mode_q;
logic result_is_special_q;
fp_t special_result_q;
fpnew_pkg::status_t special_status_q;
// Internal pipeline signals, index i holds signal after i register stages
logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q;
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q;
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q;
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q;
logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q;
fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q;
fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q;
TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
logic [0:NUM_MID_REGS] mid_pipe_mask_q;
AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
logic [0:NUM_MID_REGS] mid_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_MID_REGS] mid_pipe_ready;
// Input stage: First element of pipeline is taken from upstream logic
assign mid_pipe_eff_sub_q[0] = effective_subtraction;
assign mid_pipe_exp_prod_q[0] = exponent_product;
assign mid_pipe_exp_diff_q[0] = exponent_difference;
assign mid_pipe_tent_exp_q[0] = tentative_exponent;
assign mid_pipe_add_shamt_q[0] = addend_shamt;
assign mid_pipe_sticky_q[0] = sticky_before_add;
assign mid_pipe_sum_q[0] = sum;
assign mid_pipe_final_sign_q[0] = final_sign;
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
assign mid_pipe_res_is_spec_q[0] = result_is_special;
assign mid_pipe_spec_res_q[0] = special_result;
assign mid_pipe_spec_stat_q[0] = special_status;
assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
// Input stage: Propagate pipeline ready signal to input pipe
assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0)
`FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0)
`FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0)
`FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0)
`FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
`FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
`FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
`FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
`FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0)
`FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0)
`FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0)
`FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS];
assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS];
assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS];
assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS];
assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS];
assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS];
// --------------
// Normalization
// --------------
logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
logic lzc_zeroes; // in case only zeroes found
logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
logic signed [EXP_WIDTH-1:0] normalized_exponent;
logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift
logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit
logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization
logic sticky_after_norm; // sticky bit after normalization
logic signed [EXP_WIDTH-1:0] final_exponent;
assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
// Leading zero counter for cancellations
lzc #(
.WIDTH ( LOWER_SUM_WIDTH ),
.MODE ( 1 ) // MODE = 1 counts leading zeroes
) i_lzc (
.in_i ( sum_lower ),
.cnt_o ( leading_zero_count ),
.empty_o ( lzc_zeroes )
);
assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
// Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
always_comb begin : norm_shift_amount
// Product-anchored case or cancellations require LZC
if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
// Normal result (biased exponent > 0 and not a zero)
if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
// Undo initial product shift, remove the counted zeroes
norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
// Subnormal result
end else begin
// Cap the shift distance to align mantissa with minimum exponent
norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q);
normalized_exponent = 0; // subnormals encoded as 0
end
// Addend-anchored case
end else begin
norm_shamt = addend_shamt_q; // Undo the initial shift
normalized_exponent = tentative_exponent_q;
end
end
// Do the large normalization shift
assign sum_shifted = sum_q << norm_shamt;
// The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
// or right of the (non-carry) MSB of the sum.
always_comb begin : small_norm
// Default assignment, discarding carry bit
{final_mantissa, sum_sticky_bits} = sum_shifted;
final_exponent = normalized_exponent;
// The normalized sum has overflown, align right and fix exponent
if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
{final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
final_exponent = normalized_exponent + 1;
// The normalized sum is normal, nothing to do
end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
// do nothing
// The normalized sum is still denormal, align left - unless the result is not already subnormal
end else if (normalized_exponent > 1) begin
{final_mantissa, sum_sticky_bits} = sum_shifted << 1;
final_exponent = normalized_exponent - 1;
// Otherwise we're denormal
end else begin
final_exponent = '0;
end
end
// Update the sticky bit with the shifted-out bits
assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
// ----------------------------
// Rounding and classification
// ----------------------------
logic pre_round_sign;
logic [EXP_BITS-1:0] pre_round_exponent;
logic [MAN_BITS-1:0] pre_round_mantissa;
logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
logic [1:0] round_sticky_bits;
logic of_before_round, of_after_round; // overflow
logic uf_before_round, uf_after_round; // underflow
logic result_zero;
logic rounded_sign;
logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
// Classification before round. RISC-V mandates checking underflow AFTER rounding!
assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones
assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0
// Assemble result before rounding. In case of overflow, the largest normal value is set.
assign pre_round_sign = final_sign_q;
assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]);
assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit
assign pre_round_abs = {pre_round_exponent, pre_round_mantissa};
// In case of overflow, the round and sticky bits are set for proper rounding
assign round_sticky_bits = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm};
// Perform the rounding
fpnew_rounding #(
.AbsWidth ( EXP_BITS + MAN_BITS )
) i_fpnew_rounding (
.abs_value_i ( pre_round_abs ),
.sign_i ( pre_round_sign ),
.round_sticky_bits_i ( round_sticky_bits ),
.rnd_mode_i ( rnd_mode_q ),
.effective_subtraction_i ( effective_subtraction_q ),
.abs_rounded_o ( rounded_abs ),
.sign_o ( rounded_sign ),
.exact_zero_o ( result_zero )
);
// Classification after rounding
assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0
assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones
// -----------------
// Result selection
// -----------------
logic [WIDTH-1:0] regular_result;
fpnew_pkg::status_t regular_status;
// Assemble regular result
assign regular_result = {rounded_sign, rounded_abs};
assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
assign regular_status.DZ = 1'b0; // no divisions
assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow
assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
// Final results for output pipeline
fp_t result_d;
fpnew_pkg::status_t status_d;
// Select output depending on special case detection
assign result_d = result_is_special_q ? special_result_q : regular_result;
assign status_d = result_is_special_q ? special_status_q : regular_status;
// ----------------
// Output Pipeline
// ----------------
// Output pipeline signals, index i holds signal after i register stages
fp_t [0:NUM_OUT_REGS] out_pipe_result_q;
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_OUT_REGS] out_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS];
assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
// Input stage: Propagate pipeline ready signal to inside pipe
assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
// Output stage: assign module outputs
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
assign extension_bit_o = 1'b1; // always NaN-Box result
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
endmodule

View file

@ -1,839 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module fpnew_fma_multi #(
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
parameter type AuxType = logic,
// Do not change
localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig),
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [2:0][WIDTH-1:0] operands_i, // 3 operands
input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands
input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result
input TagType tag_i,
input logic mask_i,
input AuxType aux_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
output logic mask_o,
output AuxType aux_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
// ----------
// Constants
// ----------
// The super-format that can hold all formats
localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
// Precision bits 'p' include the implicit bit
localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1;
// The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3;
localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
// Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
// datapath leakage. This is either given by the exponent bits or the width of the LZC result.
// In most reasonable FP formats the internal exponent will be wider than the LZC result.
localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH);
// Shift amount width: maximum internal mantissa size is 3p+4 bits
localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5);
// Pipelines
localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
: 0); // no regs here otherwise
localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 2) / 3) // First to get distributed regs
: 0); // no regs here otherwise
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? (NumPipeRegs / 3) // Last to get distributed regs
: 0); // no regs here otherwise
// ----------------
// Type definition
// ----------------
typedef struct packed {
logic sign;
logic [SUPER_EXP_BITS-1:0] exponent;
logic [SUPER_MAN_BITS-1:0] mantissa;
} fp_t;
// ---------------
// Input pipeline
// ---------------
// Selected pipeline output signals as non-arrays
logic [2:0][WIDTH-1:0] operands_q;
fpnew_pkg::fp_format_e src_fmt_q;
fpnew_pkg::fp_format_e dst_fmt_q;
// Input pipeline signals, index i holds signal after i register stages
logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q;
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q;
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_INP_REGS] inp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign inp_pipe_operands_q[0] = operands_i;
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
assign inp_pipe_op_q[0] = op_i;
assign inp_pipe_op_mod_q[0] = op_mod_i;
assign inp_pipe_src_fmt_q[0] = src_fmt_i;
assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
assign inp_pipe_tag_q[0] = tag_i;
assign inp_pipe_mask_q[0] = mask_i;
assign inp_pipe_aux_q[0] = aux_i;
assign inp_pipe_valid_q[0] = in_valid_i;
// Input stage: Propagate pipeline ready signal to updtream circuitry
assign in_ready_o = inp_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
`FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS];
assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
// -----------------
// Input processing
// -----------------
logic [NUM_FORMATS-1:0][2:0] fmt_sign;
logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent;
logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa;
fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q;
// FP Input initialization
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
if (FpFmtConfig[fmt]) begin : active_format
logic [2:0][FP_WIDTH-1:0] trimmed_ops;
// Classify input
fpnew_classifier #(
.FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
.NumOperands ( 3 )
) i_fpnew_classifier (
.operands_i ( trimmed_ops ),
.is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ),
.info_o ( info_q[fmt] )
);
for (genvar op = 0; op < 3; op++) begin : gen_operands
assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0];
assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1];
assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]});
assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} <<
(SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa
end
end else begin : inactive_format
assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled
assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
end
end
fp_t operand_a, operand_b, operand_c;
fpnew_pkg::fp_info_t info_a, info_b, info_c;
// Operation selection and operand adjustment
// | \c op_q | \c op_mod_q | Operation Adjustment
// |:--------:|:-----------:|---------------------
// | FMADD | \c 0 | FMADD: none
// | FMADD | \c 1 | FMSUB: Invert sign of operand C
// | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A
// | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C
// | ADD | \c 0 | ADD: Set operand A to +1.0
// | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C
// | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode
// | *others* | \c - | *invalid*
// \note \c op_mod_q always inverts the sign of the addend.
always_comb begin : op_select
// Default assignments - packing-order-agnostic
operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]};
operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]};
operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]};
info_a = info_q[src_fmt_q][0];
info_b = info_q[src_fmt_q][1];
info_c = info_q[dst_fmt_q][2];
// op_mod_q inverts sign of operand C
operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
unique case (inp_pipe_op_q[NUM_INP_REGS])
fpnew_pkg::FMADD: ; // do nothing
fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
fpnew_pkg::ADD: begin // Set multiplicand to +1
operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0};
info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
end
fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN
if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN)
operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0};
else
operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
end
default: begin // propagate don't cares
operand_a = '{default: fpnew_pkg::DONT_CARE};
operand_b = '{default: fpnew_pkg::DONT_CARE};
operand_c = '{default: fpnew_pkg::DONT_CARE};
info_a = '{default: fpnew_pkg::DONT_CARE};
info_b = '{default: fpnew_pkg::DONT_CARE};
info_c = '{default: fpnew_pkg::DONT_CARE};
end
endcase
end
// ---------------------
// Input classification
// ---------------------
logic any_operand_inf;
logic any_operand_nan;
logic signalling_nan;
logic effective_subtraction;
logic tentative_sign;
// Reduction for special case handling
assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf});
assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan});
assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
// Effective subtraction in FMA occurs when product and addend signs differ
assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
// The tentative sign of the FMA shall be the sign of the product
assign tentative_sign = operand_a.sign ^ operand_b.sign;
// ----------------------
// Special case handling
// ----------------------
logic [WIDTH-1:0] special_result;
fpnew_pkg::status_t special_status;
logic result_is_special;
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status;
logic [NUM_FORMATS-1:0] fmt_result_is_special;
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0;
if (FpFmtConfig[fmt]) begin : active_format
always_comb begin : special_results
logic [FP_WIDTH-1:0] special_res;
// Default assignment
special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
fmt_special_status[fmt] = '0;
fmt_result_is_special[fmt] = 1'b0;
// Handle potentially mixed nan & infinity input => important for the case where infinity and
// zero are multiplied and added to a qnan.
// RISC-V mandates raising the NV exception in these cases:
// (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
fmt_special_status[fmt].NV = 1'b1; // invalid operation
// NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
end else if (any_operand_nan) begin
fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling
// Special cases involving infinity
end else if (any_operand_inf) begin
fmt_result_is_special[fmt] = 1'b1; // bypass FMA
// Effective addition of opposite infinities (±inf - ±inf) is invalid!
if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
fmt_special_status[fmt].NV = 1'b1; // invalid operation
// Handle cases where output will be inf because of inf product input
else if (info_a.is_inf || info_b.is_inf) begin
// Result is infinity with the sign of the product
special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA};
// Handle cases where the addend is inf
end else if (info_c.is_inf) begin
// Result is inifinity with sign of the addend (= operand_c)
special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA};
end
end
// Initialize special result with ones (NaN-box)
fmt_special_result[fmt] = '1;
fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
end
end else begin : inactive_format
assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
assign fmt_special_status[fmt] = '0;
assign fmt_result_is_special[fmt] = 1'b0;
end
end
// Detect special case from source format, I2F casts don't produce a special result
assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same
// Signalling input NaNs raise invalid flag, otherwise no flags set
assign special_status = fmt_special_status[dst_fmt_q];
// Assemble result according to destination format
assign special_result = fmt_special_result[dst_fmt_q]; // destination format
// ---------------------------
// Initial exponent data path
// ---------------------------
logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
logic signed [EXP_WIDTH-1:0] tentative_exponent;
// Zero-extend exponents into signed container - implicit width extension
assign exponent_a = signed'({1'b0, operand_a.exponent});
assign exponent_b = signed'({1'b0, operand_b.exponent});
assign exponent_c = signed'({1'b0, operand_c.exponent});
// Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
// with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt.
assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
// Biased product exponent is the sum of encoded exponents minus the bias.
assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp.
? 2 - signed'(fpnew_pkg::bias(dst_fmt_q))
: signed'(exponent_a + info_a.is_subnormal
+ exponent_b + info_b.is_subnormal
- 2*signed'(fpnew_pkg::bias(src_fmt_q))
+ signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt
// Exponent difference is the addend exponent minus the product exponent
assign exponent_difference = exponent_addend - exponent_product;
// The tentative exponent will be the larger of the product or addend exponent
assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
// Shift amount for addend based on exponents (unsigned as only right shifts)
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
always_comb begin : addend_shift_amount
// Product-anchored case, saturated shift (addend is only in the sticky bit)
if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
addend_shamt = 3 * PRECISION_BITS + 4;
// Addend and product will have mutual bits to add
else if (exponent_difference <= signed'(PRECISION_BITS + 2))
addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
// Addend-anchored case, saturated shift (product is only in the sticky bit)
else
addend_shamt = 0;
end
// ------------------
// Product data path
// ------------------
logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c;
logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide
logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R)
// Add implicit bits to mantissae
assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
// Mantissa multiplier (a*b)
assign product = mantissa_a * mantissa_b;
// Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
// | 000...000 | product | RS |
// <- p+2 -> <- 2p -> < 2>
assign product_shifted = product << 2; // constant shift
// -----------------
// Addend data path
// -----------------
logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on
logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky
logic sticky_before_add; // they are compressed into a single sticky bit
logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R)
logic inject_carry_in; // inject carry for subtractions if needed
// In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are
// shifted out and compressed into a sticky bit.
// BEFORE THE SHIFT:
// | mantissa_c | 000..000 |
// <- p -> <- 3p+4 ->
// AFTER THE SHIFT:
// | 000..........000 | mantissa_c | 000...............0GR | sticky bits |
// <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p ->
assign {addend_after_shift, addend_sticky_bits} =
(mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
assign sticky_before_add = (| addend_sticky_bits);
// In case of a subtraction, the addend is inverted
assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
assign inject_carry_in = effective_subtraction & ~sticky_before_add;
// ------
// Adder
// ------
logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry
logic sum_carry; // observe carry bit from sum for sign fixing
logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow
logic final_sign;
//Mantissa adder (ab+c). In normal addition, it cannot overflow.
assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
assign sum_carry = sum_raw[3*PRECISION_BITS+4];
// Complement negative sum (can only happen in subtraction -> overflows for positive results)
assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
// In case of a mispredicted subtraction result, do a sign flip
assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
? 1'b1
: (effective_subtraction ? 1'b0 : tentative_sign);
// ---------------
// Internal pipeline
// ---------------
// Pipeline output signals as non-arrays
logic effective_subtraction_q;
logic signed [EXP_WIDTH-1:0] exponent_product_q;
logic signed [EXP_WIDTH-1:0] exponent_difference_q;
logic signed [EXP_WIDTH-1:0] tentative_exponent_q;
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
logic sticky_before_add_q;
logic [3*PRECISION_BITS+3:0] sum_q;
logic final_sign_q;
fpnew_pkg::fp_format_e dst_fmt_q2;
fpnew_pkg::roundmode_e rnd_mode_q;
logic result_is_special_q;
fp_t special_result_q;
fpnew_pkg::status_t special_status_q;
// Internal pipeline signals, index i holds signal after i register stages
logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q;
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q;
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q;
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q;
logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q;
fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q;
fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q;
TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
logic [0:NUM_MID_REGS] mid_pipe_mask_q;
AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
logic [0:NUM_MID_REGS] mid_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_MID_REGS] mid_pipe_ready;
// Input stage: First element of pipeline is taken from upstream logic
assign mid_pipe_eff_sub_q[0] = effective_subtraction;
assign mid_pipe_exp_prod_q[0] = exponent_product;
assign mid_pipe_exp_diff_q[0] = exponent_difference;
assign mid_pipe_tent_exp_q[0] = tentative_exponent;
assign mid_pipe_add_shamt_q[0] = addend_shamt;
assign mid_pipe_sticky_q[0] = sticky_before_add;
assign mid_pipe_sum_q[0] = sum;
assign mid_pipe_final_sign_q[0] = final_sign;
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
assign mid_pipe_res_is_spec_q[0] = result_is_special;
assign mid_pipe_spec_res_q[0] = special_result;
assign mid_pipe_spec_stat_q[0] = special_status;
assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
// Input stage: Propagate pipeline ready signal to input pipe
assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0)
`FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0)
`FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0)
`FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0)
`FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
`FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
`FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
`FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
`FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
`FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0)
`FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0)
`FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0)
`FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS];
assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS];
assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS];
assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS];
assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS];
assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS];
// --------------
// Normalization
// --------------
logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
logic lzc_zeroes; // in case only zeroes found
logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
logic signed [EXP_WIDTH-1:0] normalized_exponent;
logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift
logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit
logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization
logic sticky_after_norm; // sticky bit after normalization
logic signed [EXP_WIDTH-1:0] final_exponent;
assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
// Leading zero counter for cancellations
lzc #(
.WIDTH ( LOWER_SUM_WIDTH ),
.MODE ( 1 ) // MODE = 1 counts leading zeroes
) i_lzc (
.in_i ( sum_lower ),
.cnt_o ( leading_zero_count ),
.empty_o ( lzc_zeroes )
);
assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
// Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
always_comb begin : norm_shift_amount
// Product-anchored case or cancellations require LZC
if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
// Normal result (biased exponent > 0 and not a zero)
if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
// Undo initial product shift, remove the counted zeroes
norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
// Subnormal result
end else begin
// Cap the shift distance to align mantissa with minimum exponent
norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
normalized_exponent = 0; // subnormals encoded as 0
end
// Addend-anchored case
end else begin
norm_shamt = addend_shamt_q; // Undo the initial shift
normalized_exponent = tentative_exponent_q;
end
end
// Do the large normalization shift
assign sum_shifted = sum_q << norm_shamt;
// The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
// or right of the (non-carry) MSB of the sum.
always_comb begin : small_norm
// Default assignment, discarding carry bit
{final_mantissa, sum_sticky_bits} = sum_shifted;
final_exponent = normalized_exponent;
// The normalized sum has overflown, align right and fix exponent
if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
{final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
final_exponent = normalized_exponent + 1;
// The normalized sum is normal, nothing to do
end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
// do nothing
// The normalized sum is still denormal, align left - unless the result is not already subnormal
end else if (normalized_exponent > 1) begin
{final_mantissa, sum_sticky_bits} = sum_shifted << 1;
final_exponent = normalized_exponent - 1;
// Otherwise we're denormal
end else begin
final_exponent = '0;
end
end
// Update the sticky bit with the shifted-out bits
assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
// ----------------------------
// Rounding and classification
// ----------------------------
logic pre_round_sign;
logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
logic [1:0] round_sticky_bits;
logic of_before_round, of_after_round; // overflow
logic uf_before_round, uf_after_round; // underflow
logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format
logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits;
logic [NUM_FORMATS-1:0] fmt_of_after_round;
logic [NUM_FORMATS-1:0] fmt_uf_after_round;
logic rounded_sign;
logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
logic result_zero;
// Classification before round. RISC-V mandates checking underflow AFTER rounding!
assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones
assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0
// Pack exponent and mantissa into proper rounding form
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
// Set up some constants
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
logic [EXP_BITS-1:0] pre_round_exponent;
logic [MAN_BITS-1:0] pre_round_mantissa;
if (FpFmtConfig[fmt]) begin : active_format
assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0];
assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS];
// Assemble result before rounding. In case of overflow, the largest normal value is set.
assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend
// Round bit is after mantissa (1 in case of overflow for rounding)
assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] |
of_before_round;
// remaining bits in mantissa to sticky (1 in case of overflow for rounding)
if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky
assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) |
sticky_after_norm | of_before_round;
end else begin : normal_sticky
assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round;
end
end else begin : inactive_format
assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Assemble result before rounding. In case of overflow, the largest normal value is set.
assign pre_round_sign = final_sign_q;
assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2];
// In case of overflow, the round and sticky bits are set for proper rounding
assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2];
// Perform the rounding
fpnew_rounding #(
.AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS )
) i_fpnew_rounding (
.abs_value_i ( pre_round_abs ),
.sign_i ( pre_round_sign ),
.round_sticky_bits_i ( round_sticky_bits ),
.rnd_mode_i ( rnd_mode_q ),
.effective_subtraction_i ( effective_subtraction_q ),
.abs_rounded_o ( rounded_abs ),
.sign_o ( rounded_sign ),
.exact_zero_o ( result_zero )
);
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
if (FpFmtConfig[fmt]) begin : active_format
always_comb begin : post_process
// detect of / uf
fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
// Assemble regular result, nan box short ones.
fmt_result[fmt] = '1;
fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
end
end else begin : inactive_format
assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
end
end
// Classification after rounding select by destination format
assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
assign of_after_round = fmt_of_after_round[dst_fmt_q2];
// -----------------
// Result selection
// -----------------
logic [WIDTH-1:0] regular_result;
fpnew_pkg::status_t regular_status;
// Assemble regular result
assign regular_result = fmt_result[dst_fmt_q2];
assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
assign regular_status.DZ = 1'b0; // no divisions
assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow
assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
// Final results for output pipeline
logic [WIDTH-1:0] result_d;
fpnew_pkg::status_t status_d;
// Select output depending on special case detection
assign result_d = result_is_special_q ? special_result_q : regular_result;
assign status_d = result_is_special_q ? special_status_q : regular_status;
// ----------------
// Output Pipeline
// ----------------
// Output pipeline signals, index i holds signal after i register stages
logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_OUT_REGS] out_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS];
assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
// Input stage: Propagate pipeline ready signal to inside pipe
assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
// Output stage: assign module outputs
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
assign extension_bit_o = 1'b1; // always NaN-Box result
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
endmodule

View file

@ -1,415 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module fpnew_noncomp #(
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
parameter type AuxType = logic,
localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [1:0][WIDTH-1:0] operands_i, // 2 operands
input logic [1:0] is_boxed_i, // 2 operands
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input TagType tag_i,
input logic mask_i,
input AuxType aux_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output fpnew_pkg::classmask_e class_mask_o,
output logic is_class_o,
output TagType tag_o,
output logic mask_o,
output AuxType aux_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
// ----------
// Constants
// ----------
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
// Pipelines
localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE)
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? ((NumPipeRegs + 1) / 2) // First to get distributed regs
: 0); // no regs here otherwise
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
? NumPipeRegs
: (PipeConfig == fpnew_pkg::DISTRIBUTED
? (NumPipeRegs / 2) // Last to get distributed regs
: 0); // no regs here otherwise
// ----------------
// Type definition
// ----------------
typedef struct packed {
logic sign;
logic [EXP_BITS-1:0] exponent;
logic [MAN_BITS-1:0] mantissa;
} fp_t;
// ---------------
// Input pipeline
// ---------------
// Input pipeline signals, index i holds signal after i register stages
logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q;
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_INP_REGS] inp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign inp_pipe_operands_q[0] = operands_i;
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
assign inp_pipe_op_q[0] = op_i;
assign inp_pipe_op_mod_q[0] = op_mod_i;
assign inp_pipe_tag_q[0] = tag_i;
assign inp_pipe_mask_q[0] = mask_i;
assign inp_pipe_aux_q[0] = aux_i;
assign inp_pipe_valid_q[0] = in_valid_i;
// Input stage: Propagate pipeline ready signal to updtream circuitry
assign in_ready_o = inp_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// ---------------------
// Input classification
// ---------------------
fpnew_pkg::fp_info_t [1:0] info_q;
// Classify input
fpnew_classifier #(
.FpFormat ( FpFormat ),
.NumOperands ( 2 )
) i_class_a (
.operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
.is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
.info_o ( info_q )
);
fp_t operand_a, operand_b;
fpnew_pkg::fp_info_t info_a, info_b;
// Packing-order-agnostic assignments
assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
assign info_a = info_q[0];
assign info_b = info_q[1];
logic any_operand_inf;
logic any_operand_nan;
logic signalling_nan;
// Reduction for special case handling
assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf});
assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan});
assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling});
logic operands_equal, operand_a_smaller;
// Equality checks for zeroes too
assign operands_equal = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero);
// Invert result if non-zero signs involved (unsigned comparison)
assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign);
// ---------------
// Sign Injection
// ---------------
fp_t sgnj_result;
fpnew_pkg::status_t sgnj_status;
logic sgnj_extension_bit;
// Sign Injection - operation is encoded in rnd_mode_q:
// RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check)
always_comb begin : sign_injections
logic sign_a, sign_b; // internal signs
// Default assignment
sgnj_result = operand_a; // result based on operand a
// NaN-boxing check will treat invalid inputs as canonical NaNs
if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)};
// Internal signs are treated as positive in case of non-NaN-boxed values
sign_a = operand_a.sign & info_a.is_boxed;
sign_b = operand_b.sign & info_b.is_boxed;
// Do the sign injection based on rm field
unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ
fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN
fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX
fpnew_pkg::RUP: sgnj_result = operand_a; // passthrough
default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
endcase
end
assign sgnj_status = '0; // sign injections never raise exceptions
// op_mod_q enables integer sign-extension of result (for storing to integer regfile)
assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1;
// ------------------
// Minimum / Maximum
// ------------------
fp_t minmax_result;
fpnew_pkg::status_t minmax_status;
logic minmax_extension_bit;
// Minimum/Maximum - operation is encoded in rnd_mode_q:
// RNE = MIN, RTZ = MAX
always_comb begin : min_max
// Default assignment
minmax_status = '0;
// Min/Max use quiet comparisons - only sNaN are invalid
minmax_status.NV = signalling_nan;
// Both NaN inputs cause a NaN output
if (info_a.is_nan && info_b.is_nan)
minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
// If one operand is NaN, the non-NaN operand is returned
else if (info_a.is_nan) minmax_result = operand_b;
else if (info_b.is_nan) minmax_result = operand_a;
// Otherwise decide according to the operation
else begin
unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN
fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX
default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
endcase
end
end
assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value
// ------------
// Comparisons
// ------------
fp_t cmp_result;
fpnew_pkg::status_t cmp_status;
logic cmp_extension_bit;
// Comparisons - operation is encoded in rnd_mode_q:
// RNE = LE, RTZ = LT, RDN = EQ
// op_mod_q inverts boolean outputs
always_comb begin : comparisons
// Default assignment
cmp_result = '0; // false
cmp_status = '0; // no flags
// Signalling NaNs always compare as false and are illegal
if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation
// Otherwise do comparisons
else begin
unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
fpnew_pkg::RNE: begin // Less than or equal
if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
end
fpnew_pkg::RTZ: begin // Less than
if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
end
fpnew_pkg::RDN: begin // Equal
if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal
else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS];
end
default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
endcase
end
end
assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers
// ---------------
// Classification
// ---------------
fpnew_pkg::status_t class_status;
logic class_extension_bit;
fpnew_pkg::classmask_e class_mask_d; // the result is actually here
// Classification - always return the classification mask on the dedicated port
always_comb begin : classify
if (info_a.is_normal) begin
class_mask_d = operand_a.sign ? fpnew_pkg::NEGNORM : fpnew_pkg::POSNORM;
end else if (info_a.is_subnormal) begin
class_mask_d = operand_a.sign ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM;
end else if (info_a.is_zero) begin
class_mask_d = operand_a.sign ? fpnew_pkg::NEGZERO : fpnew_pkg::POSZERO;
end else if (info_a.is_inf) begin
class_mask_d = operand_a.sign ? fpnew_pkg::NEGINF : fpnew_pkg::POSINF;
end else if (info_a.is_nan) begin
class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN : fpnew_pkg::QNAN;
end else begin
class_mask_d = fpnew_pkg::QNAN; // default value
end
end
assign class_status = '0; // classification does not set flags
assign class_extension_bit = 1'b0; // classification always produces results in integer registers
// -----------------
// Result selection
// -----------------
fp_t result_d;
fpnew_pkg::status_t status_d;
logic extension_bit_d;
logic is_class_d;
// Select result
always_comb begin : select_result
unique case (inp_pipe_op_q[NUM_INP_REGS])
fpnew_pkg::SGNJ: begin
result_d = sgnj_result;
status_d = sgnj_status;
extension_bit_d = sgnj_extension_bit;
end
fpnew_pkg::MINMAX: begin
result_d = minmax_result;
status_d = minmax_status;
extension_bit_d = minmax_extension_bit;
end
fpnew_pkg::CMP: begin
result_d = cmp_result;
status_d = cmp_status;
extension_bit_d = cmp_extension_bit;
end
fpnew_pkg::CLASSIFY: begin
result_d = '{default: fpnew_pkg::DONT_CARE}; // unused
status_d = class_status;
extension_bit_d = class_extension_bit;
end
default: begin
result_d = '{default: fpnew_pkg::DONT_CARE}; // dont care
status_d = '{default: fpnew_pkg::DONT_CARE}; // dont care
extension_bit_d = fpnew_pkg::DONT_CARE; // dont care
end
endcase
end
assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY);
// ----------------
// Output Pipeline
// ----------------
// Output pipeline signals, index i holds signal after i register stages
fp_t [0:NUM_OUT_REGS] out_pipe_result_q;
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q;
fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q;
logic [0:NUM_OUT_REGS] out_pipe_is_class_q;
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_OUT_REGS] out_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_extension_bit_q[0] = extension_bit_d;
assign out_pipe_class_mask_q[0] = class_mask_d;
assign out_pipe_is_class_q[0] = is_class_d;
assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
// Input stage: Propagate pipeline ready signal to inside pipe
assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0];
// Generate the register stages
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
`FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0)
`FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN)
`FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0)
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
// Output stage: assign module outputs
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS];
assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS];
assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS];
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q});
endmodule

View file

@ -1,244 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
module fpnew_opgroup_block #(
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL,
// FPU configuration
parameter int unsigned Width = 32,
parameter logic EnableVectors = 1'b1,
parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1,
parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1,
parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0},
parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL},
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
parameter int unsigned TrueSIMDClass = 0,
// Do not change
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS,
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors),
localparam type MaskType = logic [NUM_LANES-1:0]
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input fpnew_pkg::int_format_e int_fmt_i,
input logic vectorial_op_i,
input TagType tag_i,
input MaskType simd_mask_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [Width-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
// ----------------
// Type Definition
// ----------------
typedef struct packed {
logic [Width-1:0] result;
fpnew_pkg::status_t status;
logic ext_bit;
TagType tag;
} output_t;
// Handshake signals for the slices
logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy;
output_t [NUM_FORMATS-1:0] fmt_outputs;
// -----------
// Input Side
// -----------
assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format
// -------------------------
// Generate Parallel Slices
// -------------------------
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices
// Some constants for this format
localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask);
localparam logic IS_FIRST_MERGED =
fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask);
// Generate slice only if format enabled
if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format
logic in_valid;
assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format
// Forward masks related to the right SIMD lane
localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors);
logic [INTERNAL_LANES-1:0] mask_slice;
always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b];
fpnew_opgroup_fmt_slice #(
.OpGroup ( OpGroup ),
.FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
.Width ( Width ),
.EnableVectors ( EnableVectors ),
.NumPipeRegs ( FmtPipeRegs[fmt] ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.TrueSIMDClass ( TrueSIMDClass )
) i_fmt_slice (
.clk_i,
.rst_ni,
.operands_i ( operands_i ),
.is_boxed_i ( is_boxed_i[fmt] ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.vectorial_op_i,
.tag_i,
.simd_mask_i ( mask_slice ),
.in_valid_i ( in_valid ),
.in_ready_o ( fmt_in_ready[fmt] ),
.flush_i,
.result_o ( fmt_outputs[fmt].result ),
.status_o ( fmt_outputs[fmt].status ),
.extension_bit_o( fmt_outputs[fmt].ext_bit ),
.tag_o ( fmt_outputs[fmt].tag ),
.out_valid_o ( fmt_out_valid[fmt] ),
.out_ready_i ( fmt_out_ready[fmt] ),
.busy_o ( fmt_busy[fmt] )
);
// If the format wants to use merged ops, tie off the dangling ones not used here
end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused
localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
// Ready is split up into formats
assign fmt_in_ready[fmt] = fmt_in_ready[int'(FMT)];
assign fmt_out_valid[fmt] = 1'b0; // don't emit values
assign fmt_busy[fmt] = 1'b0; // never busy
// Outputs are don't care
assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE};
assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE};
assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE);
// Tie off disabled formats
end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt
assign fmt_in_ready[fmt] = 1'b0; // don't accept operations
assign fmt_out_valid[fmt] = 1'b0; // don't emit values
assign fmt_busy[fmt] = 1'b0; // never busy
// Outputs are don't care
assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE};
assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE};
assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE);
end
end
// ----------------------
// Generate Merged Slice
// ----------------------
if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice
localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask);
logic in_valid;
assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED);
fpnew_opgroup_multifmt_slice #(
.OpGroup ( OpGroup ),
.Width ( Width ),
.FpFmtConfig ( FpFmtMask ),
.IntFmtConfig ( IntFmtMask ),
.EnableVectors ( EnableVectors ),
.NumPipeRegs ( REG ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType )
) i_multifmt_slice (
.clk_i,
.rst_ni,
.operands_i,
.is_boxed_i,
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.int_fmt_i,
.vectorial_op_i,
.tag_i,
.simd_mask_i ( simd_mask_i ),
.in_valid_i ( in_valid ),
.in_ready_o ( fmt_in_ready[FMT] ),
.flush_i,
.result_o ( fmt_outputs[FMT].result ),
.status_o ( fmt_outputs[FMT].status ),
.extension_bit_o ( fmt_outputs[FMT].ext_bit ),
.tag_o ( fmt_outputs[FMT].tag ),
.out_valid_o ( fmt_out_valid[FMT] ),
.out_ready_i ( fmt_out_ready[FMT] ),
.busy_o ( fmt_busy[FMT] )
);
end
// ------------------
// Arbitrate Outputs
// ------------------
output_t arbiter_output;
// Round-Robin arbiter to decide which result to use
rr_arb_tree #(
.NumIn ( NUM_FORMATS ),
.DataType ( output_t ),
.AxiVldRdy ( 1'b1 )
) i_arbiter (
.clk_i,
.rst_ni,
.flush_i,
.rr_i ( '0 ),
.req_i ( fmt_out_valid ),
.gnt_o ( fmt_out_ready ),
.data_i ( fmt_outputs ),
.gnt_i ( out_ready_i ),
.req_o ( out_valid_o ),
.data_o ( arbiter_output ),
.idx_o ( /* unused */ )
);
// Unpack output
assign result_o = arbiter_output.result;
assign status_o = arbiter_output.status;
assign extension_bit_o = arbiter_output.ext_bit;
assign tag_o = arbiter_output.tag;
assign busy_o = (| fmt_busy);
endmodule

View file

@ -1,292 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
module fpnew_opgroup_fmt_slice #(
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL,
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
// FPU configuration
parameter int unsigned Width = 32,
parameter logic EnableVectors = 1'b1,
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
parameter int unsigned TrueSIMDClass = 0,
// Do not change
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors),
localparam type MaskType = logic [NUM_LANES-1:0]
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
input logic [NUM_OPERANDS-1:0] is_boxed_i,
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input logic vectorial_op_i,
input TagType tag_i,
input MaskType simd_mask_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [Width-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat);
localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES);
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
logic vectorial_op;
logic [NUM_LANES*FP_WIDTH-1:0] slice_result;
logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result;
fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask;
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
logic [NUM_LANES-1:0] lane_masks;
logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito
logic result_is_vector, result_is_class;
// -----------
// Input Side
// -----------
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
// ---------------
// Generate Lanes
// ---------------
for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
logic [FP_WIDTH-1:0] local_result; // lane-local results
logic local_sign;
// Generate instances only if needed, lane 0 always generated
if ((lane == 0) || EnableVectors) begin : active_lane
logic in_valid, out_valid, out_ready; // lane-local handshake
logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands
logic [FP_WIDTH-1:0] op_result; // lane-local results
fpnew_pkg::status_t op_status;
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
// Slice out the operands for this lane
always_comb begin : prepare_input
for (int i = 0; i < int'(NUM_OPERANDS); i++) begin
local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH];
end
end
// Instantiate the operation from the selected opgroup
if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
fpnew_fma #(
.FpFormat ( FpFormat ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic )
) i_fma (
.clk_i,
.rst_ni,
.operands_i ( local_operands ),
.is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( vectorial_op ), // Remember whether operation was vectorial
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_vectorial[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
assign lane_is_class[lane] = 1'b0;
assign lane_class_mask[lane] = fpnew_pkg::NEGINF;
end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
// fpnew_divsqrt #(
// .FpFormat (FpFormat),
// .NumPipeRegs(NumPipeRegs),
// .PipeConfig (PipeConfig),
// .TagType (TagType),
// .AuxType (logic)
// ) i_divsqrt (
// .clk_i,
// .rst_ni,
// .operands_i ( local_operands ),
// .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
// .rnd_mode_i,
// .op_i,
// .op_mod_i,
// .tag_i,
// .aux_i ( vectorial_op ), // Remember whether operation was vectorial
// .in_valid_i ( in_valid ),
// .in_ready_o ( lane_in_ready[lane] ),
// .flush_i,
// .result_o ( op_result ),
// .status_o ( op_status ),
// .extension_bit_o ( lane_ext_bit[lane] ),
// .tag_o ( lane_tags[lane] ),
// .aux_o ( lane_vectorial[lane] ),
// .out_valid_o ( out_valid ),
// .out_ready_i ( out_ready ),
// .busy_o ( lane_busy[lane] )
// );
// assign lane_is_class[lane] = 1'b0;
end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
fpnew_noncomp #(
.FpFormat (FpFormat),
.NumPipeRegs(NumPipeRegs),
.PipeConfig (PipeConfig),
.TagType (TagType),
.AuxType (logic)
) i_noncomp (
.clk_i,
.rst_ni,
.operands_i ( local_operands ),
.is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( vectorial_op ), // Remember whether operation was vectorial
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.class_mask_o ( lane_class_mask[lane] ),
.is_class_o ( lane_is_class[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_vectorial[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end // ADD OTHER OPTIONS HERE
// Handshakes are only done if the lane is actually used
assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
// Properly NaN-box or sign-extend the slice result if not in use
assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
// Otherwise generate constant sign-extension
end else begin
assign lane_out_valid[lane] = 1'b0; // unused lane
assign lane_in_ready[lane] = 1'b0; // unused lane
assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
assign lane_status[lane] = '0;
assign lane_busy[lane] = 1'b0;
assign lane_is_class[lane] = 1'b0;
end
// Insert lane result into slice result
assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result;
// Create Classification results
if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size
assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane];
assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0;
end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size
assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF ||
lane_class_mask[lane] == fpnew_pkg::NEGNORM ||
lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM ||
lane_class_mask[lane] == fpnew_pkg::NEGZERO);
// Write the current block segment
assign slice_vec_class_result[(lane+1)*8-1:lane*8] = {
local_sign, // BIT 7
~local_sign, // BIT 6
lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5
lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4
lane_class_mask[lane] == fpnew_pkg::POSZERO
|| lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3
lane_class_mask[lane] == fpnew_pkg::POSSUBNORM
|| lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2
lane_class_mask[lane] == fpnew_pkg::POSNORM
|| lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1
lane_class_mask[lane] == fpnew_pkg::POSINF
|| lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0
};
end
end
// ------------
// Output Side
// ------------
assign result_is_vector = lane_vectorial[0];
assign result_is_class = lane_is_class[0];
assign slice_regular_result = $signed({extension_bit_o, slice_result});
localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8;
// Pad out unused vec_class bits if each classify result is on 8 bits
if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin
if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class
assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0;
end
end
// localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1;
assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0];
// Select the proper result
assign result_o = result_is_class ? slice_class_result : slice_regular_result;
assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused
assign tag_o = lane_tags[0]; // upper lanes unused
assign busy_o = (| lane_busy);
assign out_valid_o = lane_out_valid[0]; // upper lanes unused
// Collapse the lane status
always_comb begin : output_processing
// Collapse the status
automatic fpnew_pkg::status_t temp_status;
temp_status = '0;
for (int i = 0; i < int'(NUM_LANES); i++)
temp_status |= lane_status[i] & {5{lane_masks[i]}};
status_o = temp_status;
end
endmodule

View file

@ -1,449 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
`include "common_cells/registers.svh"
module fpnew_opgroup_multifmt_slice #(
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV,
parameter int unsigned Width = 64,
// FPU configuration
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
parameter logic EnableVectors = 1'b1,
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter type TagType = logic,
// Do not change
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS,
localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors),
localparam type MaskType = logic [NUM_SIMD_LANES-1:0]
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input fpnew_pkg::int_format_e int_fmt_i,
input logic vectorial_op_i,
input TagType tag_i,
input MaskType simd_mask_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [Width-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig);
localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
// We will send the format information along with the data
localparam int unsigned FMT_BITS =
fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes
logic vectorial_op;
logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation
logic [AUX_BITS-1:0] aux_data;
// additional flags for CONV
logic dst_fmt_is_int, dst_is_cpk;
logic [1:0] dst_vec_op; // info for vectorial results (for packing)
logic [2:0] target_aux_d, target_aux_q;
logic is_up_cast, is_down_cast;
logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result;
logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
logic [Width-1:0] conv_slice_result;
logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
logic [NUM_LANES-1:0] lane_masks;
logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
logic [NUM_LANES-1:0] lane_busy; // dito
logic result_is_vector;
logic [FMT_BITS-1:0] result_fmt;
logic result_fmt_is_int, result_is_cpk;
logic [1:0] result_vec_op; // info for vectorial results (for packing)
logic simd_synch_rdy, simd_synch_done;
// -----------
// Input Side
// -----------
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
// Cast-and-Pack ops are encoded in operation and modifier
assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
op_i == fpnew_pkg::CPKCD);
assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
// The destination format is the int format for F2I casts
assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
// The data sent along consists of the vectorial flag and format bits
assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt};
assign target_aux_d = {dst_vec_op, dst_is_cpk};
// CONV passes one operand for assembly after the unit: opC for cpk, opB for others
if (OpGroup == fpnew_pkg::CONV) begin : conv_target
assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
end
// For 2-operand units, prepare boxing info
logic [NUM_FORMATS-1:0] is_boxed_1op;
logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
always_comb begin : boxed_2op
for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
is_boxed_1op[fmt] = is_boxed_i[fmt][0];
is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
end
end
// ---------------
// Generate Lanes
// ---------------
for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
// Get a mask of active formats for this lane
localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
// Cast-specific parameters
localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
// Lane parameters from Opgroup
localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
? CONV_FORMATS : ACTIVE_FORMATS;
localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
logic [LANE_WIDTH-1:0] local_result; // lane-local results
// Generate instances only if needed, lane 0 always generated
if ((lane == 0) || EnableVectors) begin : active_lane
logic in_valid, out_valid, out_ready; // lane-local handshake
logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands
logic [LANE_WIDTH-1:0] op_result; // lane-local results
fpnew_pkg::status_t op_status;
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
// Slice out the operands for this lane, upper bits are ignored in the unit
always_comb begin : prepare_input
for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
end
// override operand 0 for some conversions
if (OpGroup == fpnew_pkg::CONV) begin
// Source is an integer
if (op_i == fpnew_pkg::I2F) begin
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
// vectorial F2F up casts
end else if (op_i == fpnew_pkg::F2F) begin
if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
MAX_FP_WIDTH/2;
end
// CPK
end else if (dst_is_cpk) begin
if (lane == 1) begin
local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
end
end
end
end
// Instantiate the operation from the selected opgroup
if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
fpnew_fma_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_fma_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands ),
.is_boxed_i,
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
fpnew_divsqrt_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_divsqrt_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands[1:0] ), // 2 operands
.is_boxed_i ( is_boxed_2op ), // 2 operands
.rnd_mode_i,
.op_i,
.dst_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.divsqrt_done_o ( divsqrt_done[lane] ),
.simd_synch_done_i( simd_synch_done ),
.divsqrt_ready_o ( divsqrt_ready[lane]),
.simd_synch_rdy_i( simd_synch_rdy ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
fpnew_cast_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.IntFmtConfig ( CONV_INT_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_cast_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands[0] ),
.is_boxed_i ( is_boxed_1op ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.int_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] )
);
end // ADD OTHER OPTIONS HERE
// Handshakes are only done if the lane is actually used
assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
// Properly NaN-box or sign-extend the slice result if not in use
assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
// Otherwise generate constant sign-extension
end else begin : inactive_lane
assign lane_out_valid[lane] = 1'b0; // unused lane
assign lane_in_ready[lane] = 1'b0; // unused lane
assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
assign lane_status[lane] = '0;
assign lane_busy[lane] = 1'b0;
end
// Generate result packing depending on float format
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
// only for active formats within the lane
if (ACTIVE_FORMATS[fmt]) begin
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
local_result[FP_WIDTH-1:0];
end else if ((LANE+1)*FP_WIDTH <= Width) begin
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
'{default: lane_ext_bit[LANE]};
end else if (LANE*FP_WIDTH < Width) begin
assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
'{default: lane_ext_bit[LANE]};
end
end
// Generate result packing depending on integer format
if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (ACTIVE_INT_FORMATS[ifmt]) begin
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
local_result[INT_WIDTH-1:0];
end else if ((LANE+1)*INT_WIDTH <= Width) begin
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
end else if (LANE*INT_WIDTH < Width) begin
assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
end
end
end
end
// Extend slice result if needed
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
if (NUM_LANES*FP_WIDTH < Width)
assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
end
// Mute int results if unused
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
assign ifmt_slice_result[ifmt] = '0;
end
end
// Bypass lanes with target operand for vectorial casts
if (OpGroup == fpnew_pkg::CONV) begin : target_regs
// Bypass pipeline signals, index i holds signal after i register stages
logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
logic [0:NumPipeRegs][2:0] byp_pipe_aux_q;
logic [0:NumPipeRegs] byp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NumPipeRegs] byp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign byp_pipe_target_q[0] = conv_target_d;
assign byp_pipe_aux_q[0] = target_aux_d;
assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op;
// Generate the register stages
for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0)
`FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0)
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
// Output stage: assign module outputs
assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
// decode the aux data
assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
end else begin : no_conv
assign {result_vec_op, result_is_cpk} = '0;
end
// Synch lanes if there is more than one
assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0];
assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0];
// ------------
// Output Side
// ------------
assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
assign result_o = result_fmt_is_int
? ifmt_slice_result[result_fmt]
: fmt_slice_result[result_fmt];
assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
assign tag_o = lane_tags[0]; // don't care about upper ones
assign busy_o = (| lane_busy);
assign out_valid_o = lane_out_valid[0]; // don't care about upper ones
// Collapse the status
always_comb begin : output_processing
// Collapse the status
automatic fpnew_pkg::status_t temp_status;
temp_status = '0;
for (int i = 0; i < int'(NUM_LANES); i++)
temp_status |= lane_status[i] & {5{lane_masks[i]}};
status_o = temp_status;
end
endmodule

View file

@ -1,495 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
package fpnew_pkg;
// ---------
// FP TYPES
// ---------
// | Enumerator | Format | Width | EXP_BITS | MAN_BITS
// |:----------:|------------------|-------:|:--------:|:--------:
// | FP32 | IEEE binary32 | 32 bit | 8 | 23
// | FP64 | IEEE binary64 | 64 bit | 11 | 52
// | FP16 | IEEE binary16 | 16 bit | 5 | 10
// | FP8 | binary8 | 8 bit | 5 | 2
// | FP16ALT | binary16alt | 16 bit | 8 | 7
// *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
// Encoding for a format
typedef struct packed {
int unsigned exp_bits;
int unsigned man_bits;
} fp_encoding_t;
localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);
// FP formats
typedef enum logic [FP_FORMAT_BITS-1:0] {
FP32 = 'd0,
FP64 = 'd1,
FP16 = 'd2,
FP8 = 'd3,
FP16ALT = 'd4
// add new formats here
} fp_format_e;
// Encodings for supported FP formats
localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS = '{
'{8, 23}, // IEEE binary32 (single)
'{11, 52}, // IEEE binary64 (double)
'{5, 10}, // IEEE binary16 (half)
'{5, 2}, // custom binary8
'{8, 7} // custom binary16alt
// add new formats here
};
typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks)
typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format
localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only
// ---------
// INT TYPES
// ---------
// | Enumerator | Width |
// |:----------:|-------:|
// | INT8 | 8 bit |
// | INT16 | 16 bit |
// | INT32 | 32 bit |
// | INT64 | 64 bit |
// *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats
localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS);
// Int formats
typedef enum logic [INT_FORMAT_BITS-1:0] {
INT8,
INT16,
INT32,
INT64
// add new formats here
} int_format_e;
// Returns the width of an INT format by index
function automatic int unsigned int_width(int_format_e ifmt);
unique case (ifmt)
INT8: return 8;
INT16: return 16;
INT32: return 32;
INT64: return 64;
default: begin
// pragma translate_off
$fatal(1, "Invalid INT format supplied");
// pragma translate_on
// just return any integer to avoid any latches
// hopefully this error is caught by simulation
return INT8;
end
endcase
endfunction
typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks)
// --------------
// FP OPERATIONS
// --------------
localparam int unsigned NUM_OPGROUPS = 4;
// Each FP operation belongs to an operation group
typedef enum logic [1:0] {
ADDMUL, DIVSQRT, NONCOMP, CONV
} opgroup_e;
localparam int unsigned OP_BITS = 4;
typedef enum logic [OP_BITS-1:0] {
FMADD, FNMSUB, ADD, MUL, // ADDMUL operation group
DIV, SQRT, // DIVSQRT operation group
SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group
F2F, F2I, I2F, CPKAB, CPKCD // CONV operation group
} operation_e;
// -------------------
// RISC-V FP-SPECIFIC
// -------------------
// Rounding modes
typedef enum logic [2:0] {
RNE = 3'b000,
RTZ = 3'b001,
RDN = 3'b010,
RUP = 3'b011,
RMM = 3'b100,
ROD = 3'b101, // This mode is not defined in RISC-V FP-SPEC
DYN = 3'b111
} roundmode_e;
// Status flags
typedef struct packed {
logic NV; // Invalid
logic DZ; // Divide by zero
logic OF; // Overflow
logic UF; // Underflow
logic NX; // Inexact
} status_t;
// Information about a floating point value
typedef struct packed {
logic is_normal; // is the value normal
logic is_subnormal; // is the value subnormal
logic is_zero; // is the value zero
logic is_inf; // is the value infinity
logic is_nan; // is the value NaN
logic is_signalling; // is the value a signalling NaN
logic is_quiet; // is the value a quiet NaN
logic is_boxed; // is the value properly NaN-boxed (RISC-V specific)
} fp_info_t;
// Classification mask
typedef enum logic [9:0] {
NEGINF = 10'b00_0000_0001,
NEGNORM = 10'b00_0000_0010,
NEGSUBNORM = 10'b00_0000_0100,
NEGZERO = 10'b00_0000_1000,
POSZERO = 10'b00_0001_0000,
POSSUBNORM = 10'b00_0010_0000,
POSNORM = 10'b00_0100_0000,
POSINF = 10'b00_1000_0000,
SNAN = 10'b01_0000_0000,
QNAN = 10'b10_0000_0000
} classmask_e;
// ------------------
// FPU configuration
// ------------------
// Pipelining registers can be inserted (at elaboration time) into operational units
typedef enum logic [1:0] {
BEFORE, // registers are inserted at the inputs of the unit
AFTER, // registers are inserted at the outputs of the unit
INSIDE, // registers are inserted at predetermined (suboptimal) locations in the unit
DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE
} pipe_config_t;
// Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all.
typedef enum logic [1:0] {
DISABLED, // arithmetic units are not generated
PARALLEL, // arithmetic units are generated in prallel slices, one for each format
MERGED // arithmetic units are contained within a merged unit holding multiple formats
} unit_type_t;
// Array of unit types indexed by format
typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t;
// Array of format-specific unit types by opgroup
typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t;
// same with unsigned
typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t;
// FPU configuration: features
typedef struct packed {
int unsigned Width;
logic EnableVectors;
logic EnableNanBox;
fmt_logic_t FpFmtMask;
ifmt_logic_t IntFmtMask;
} fpu_features_t;
localparam fpu_features_t RV64D = '{
Width: 64,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b11000,
IntFmtMask: 4'b0011
};
localparam fpu_features_t RV32D = '{
Width: 64,
EnableVectors: 1'b1,
EnableNanBox: 1'b1,
FpFmtMask: 5'b11000,
IntFmtMask: 4'b0010
};
localparam fpu_features_t RV32F = '{
Width: 32,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
localparam fpu_features_t RV64D_Xsflt = '{
Width: 64,
EnableVectors: 1'b1,
EnableNanBox: 1'b1,
FpFmtMask: 5'b11111,
IntFmtMask: 4'b1111
};
localparam fpu_features_t RV32F_Xsflt = '{
Width: 32,
EnableVectors: 1'b1,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10111,
IntFmtMask: 4'b1110
};
localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{
Width: 32,
EnableVectors: 1'b1,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10001,
IntFmtMask: 4'b0110
};
// FPU configuraion: implementation
typedef struct packed {
opgrp_fmt_unsigned_t PipeRegs;
opgrp_fmt_unit_types_t UnitTypes;
pipe_config_t PipeConfig;
} fpu_implementation_t;
localparam fpu_implementation_t DEFAULT_NOREGS = '{
PipeRegs: '{default: 0},
UnitTypes: '{'{default: PARALLEL}, // ADDMUL
'{default: MERGED}, // DIVSQRT
'{default: PARALLEL}, // NONCOMP
'{default: MERGED}}, // CONV
PipeConfig: BEFORE
};
localparam fpu_implementation_t DEFAULT_SNITCH = '{
PipeRegs: '{default: 1},
UnitTypes: '{'{default: PARALLEL}, // ADDMUL
'{default: DISABLED}, // DIVSQRT
'{default: PARALLEL}, // NONCOMP
'{default: MERGED}}, // CONV
PipeConfig: BEFORE
};
// -----------------------
// Synthesis optimization
// -----------------------
localparam logic DONT_CARE = 1'b1; // the value to assign as don't care
// -------------------------
// General helper functions
// -------------------------
function automatic int minimum(int a, int b);
return (a < b) ? a : b;
endfunction
function automatic int maximum(int a, int b);
return (a > b) ? a : b;
endfunction
// -------------------------------------------
// Helper functions for FP formats and values
// -------------------------------------------
// Returns the width of a FP format
function automatic int unsigned fp_width(fp_format_e fmt);
return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1;
endfunction
// Returns the widest FP format present
function automatic int unsigned max_fp_width(fmt_logic_t cfg);
automatic int unsigned res = 0;
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
if (cfg[i])
res = unsigned'(maximum(res, fp_width(fp_format_e'(i))));
return res;
endfunction
// Returns the narrowest FP format present
function automatic int unsigned min_fp_width(fmt_logic_t cfg);
automatic int unsigned res = max_fp_width(cfg);
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
if (cfg[i])
res = unsigned'(minimum(res, fp_width(fp_format_e'(i))));
return res;
endfunction
// Returns the number of expoent bits for a format
function automatic int unsigned exp_bits(fp_format_e fmt);
return FP_ENCODINGS[fmt].exp_bits;
endfunction
// Returns the number of mantissa bits for a format
function automatic int unsigned man_bits(fp_format_e fmt);
return FP_ENCODINGS[fmt].man_bits;
endfunction
// Returns the bias value for a given format (as per IEEE 754-2008)
function automatic int unsigned bias(fp_format_e fmt);
return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias
endfunction
function automatic fp_encoding_t super_format(fmt_logic_t cfg);
automatic fp_encoding_t res;
res = '0;
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
if (cfg[fmt]) begin // only active format
res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt))));
res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt))));
end
return res;
endfunction
// -------------------------------------------
// Helper functions for INT formats and values
// -------------------------------------------
// Returns the widest INT format present
function automatic int unsigned max_int_width(ifmt_logic_t cfg);
automatic int unsigned res = 0;
for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin
if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt)));
end
return res;
endfunction
// --------------------------------------------------
// Helper functions for operations and FPU structure
// --------------------------------------------------
// Returns the operation group of the given operation
function automatic opgroup_e get_opgroup(operation_e op);
unique case (op)
FMADD, FNMSUB, ADD, MUL: return ADDMUL;
DIV, SQRT: return DIVSQRT;
SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP;
F2F, F2I, I2F, CPKAB, CPKCD: return CONV;
default: return NONCOMP;
endcase
endfunction
// Returns the number of operands by operation group
function automatic int unsigned num_operands(opgroup_e grp);
unique case (grp)
ADDMUL: return 3;
DIVSQRT: return 2;
NONCOMP: return 2;
CONV: return 3; // vectorial casts use 3 operands
default: return 0;
endcase
endfunction
// Returns the number of lanes according to width, format and vectors
function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec);
return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane
endfunction
// Returns the maximum number of lanes in the FPU according to width, format config and vectors
function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec);
return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane
endfunction
// Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice
function automatic fmt_logic_t get_lane_formats(int unsigned width,
fmt_logic_t cfg,
int unsigned lane_no);
automatic fmt_logic_t res;
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
// Mask active formats with the number of lanes for that format
res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no);
return res;
endfunction
// Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice
function automatic ifmt_logic_t get_lane_int_formats(int unsigned width,
fmt_logic_t cfg,
ifmt_logic_t icfg,
int unsigned lane_no);
automatic ifmt_logic_t res;
automatic fmt_logic_t lanefmts;
res = '0;
lanefmts = get_lane_formats(width, cfg, lane_no);
for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
// Mask active int formats with the width of the float formats
if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))))
res[ifmt] |= icfg[ifmt] && lanefmts[fmt];
return res;
endfunction
// Returns a mask of active FP formats that are present in lane lane_no of a CONV slice
function automatic fmt_logic_t get_conv_lane_formats(int unsigned width,
fmt_logic_t cfg,
int unsigned lane_no);
automatic fmt_logic_t res;
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
// Mask active formats with the number of lanes for that format, CPK at least twice
res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) ||
(CPK_FORMATS[fmt] && (lane_no < 2)));
return res;
endfunction
// Returns a mask of active INT formats that are present in lane lane_no of a CONV slice
function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width,
fmt_logic_t cfg,
ifmt_logic_t icfg,
int unsigned lane_no);
automatic ifmt_logic_t res;
automatic fmt_logic_t lanefmts;
res = '0;
lanefmts = get_conv_lane_formats(width, cfg, lane_no);
for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
// Mask active int formats with the width of the float formats
res[ifmt] |= icfg[ifmt] && lanefmts[fmt] &&
(fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)));
return res;
endfunction
// Return whether any active format is set as MERGED
function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
if (cfg[i] && types[i] == MERGED)
return 1'b1;
return 1'b0;
endfunction
// Return whether the given format is the first active one set as MERGED
function automatic logic is_first_enabled_multi(fp_format_e fmt,
fmt_unit_types_t types,
fmt_logic_t cfg);
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt);
end
return 1'b0;
endfunction
// Returns the first format that is active and is set as MERGED
function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
if (cfg[i] && types[i] == MERGED)
return fp_format_e'(i);
return fp_format_e'(0);
endfunction
// Returns the largest number of regs that is active and is set as MERGED
function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs,
fmt_unit_types_t types,
fmt_logic_t cfg);
automatic int unsigned res = 0;
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]);
end
return res;
endfunction
endpackage

View file

@ -1,76 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
module fpnew_rounding #(
parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit
) (
// Input value
input logic [AbsWidth-1:0] abs_value_i, // absolute value without sign
input logic sign_i,
// Rounding information
input logic [1:0] round_sticky_bits_i, // round and sticky bits {RS}
input fpnew_pkg::roundmode_e rnd_mode_i,
input logic effective_subtraction_i, // sign of inputs affects rounding of zeroes
// Output value
output logic [AbsWidth-1:0] abs_rounded_o, // absolute value without sign
output logic sign_o,
// Output classification
output logic exact_zero_o // output is an exact zero
);
logic round_up; // Rounding decision
// Take the rounding decision according to RISC-V spec
// RoundMode | Mnemonic | Meaning
// :--------:|:--------:|:-------
// 000 | RNE | Round to Nearest, ties to Even
// 001 | RTZ | Round towards Zero
// 010 | RDN | Round Down (towards -\infty)
// 011 | RUP | Round Up (towards \infty)
// 100 | RMM | Round to Nearest, ties to Max Magnitude
// 101 | ROD | Round towards odd (this mode is not define in RISC-V FP-SPEC)
// others | | *invalid*
always_comb begin : rounding_decision
unique case (rnd_mode_i)
fpnew_pkg::RNE: // Decide accoring to round/sticky bits
unique case (round_sticky_bits_i)
2'b00,
2'b01: round_up = 1'b0; // < ulp/2 away, round down
2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
2'b11: round_up = 1'b1; // > ulp/2 away, round up
default: round_up = fpnew_pkg::DONT_CARE;
endcase
fpnew_pkg::RTZ: round_up = 1'b0; // always round down
fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if -
fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if +
fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i);
default: round_up = fpnew_pkg::DONT_CARE; // propagate x
endcase
end
// Perform the rounding, exponent change and overflow to inf happens automagically
assign abs_rounded_o = abs_value_i + round_up;
// True zero result is a zero result without dirty round/sticky bits
assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0);
// In case of effective subtraction (thus signs of addition operands must have differed) and a
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
assign sign_o = (exact_zero_o && effective_subtraction_i)
? (rnd_mode_i == fpnew_pkg::RDN)
: sign_i;
endmodule

View file

@ -1,185 +0,0 @@
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
module fpnew_top #(
// FPU configuration
parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt,
parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS,
parameter type TagType = logic,
parameter int unsigned TrueSIMDClass = 0,
parameter int unsigned EnableSIMDMask = 0,
// Do not change
localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors),
localparam type MaskType = logic [NumLanes-1:0],
localparam int unsigned WIDTH = Features.Width,
localparam int unsigned NUM_OPERANDS = 3
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input fpnew_pkg::int_format_e int_fmt_i,
input logic vectorial_op_i,
input TagType tag_i,
input MaskType simd_mask_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
output fpnew_pkg::status_t status_o,
output TagType tag_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o
);
localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS;
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS;
// ----------------
// Type Definition
// ----------------
typedef struct packed {
logic [WIDTH-1:0] result;
fpnew_pkg::status_t status;
TagType tag;
} output_t;
// Handshake signals for the blocks
logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy;
output_t [NUM_OPGROUPS-1:0] opgrp_outputs;
logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed;
// -----------
// Input Side
// -----------
assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)];
// NaN-boxing check
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
// NaN boxing is only generated if it's enabled and needed
if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check
for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands
assign is_boxed[fmt][op] = (!vectorial_op_i)
? operands_i[op][WIDTH-1:FP_WIDTH] == '1
: 1'b1;
end
end else begin : no_check
assign is_boxed[fmt] = '1;
end
end
// Filter out the mask if not used
MaskType simd_mask;
assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}};
// -------------------------
// Generate Operation Blocks
// -------------------------
for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups
localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp));
logic in_valid;
logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed;
assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp));
// slice out input boxing
always_comb begin : slice_inputs
for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++)
input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0];
end
fpnew_opgroup_block #(
.OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ),
.Width ( WIDTH ),
.EnableVectors ( Features.EnableVectors ),
.FpFmtMask ( Features.FpFmtMask ),
.IntFmtMask ( Features.IntFmtMask ),
.FmtPipeRegs ( Implementation.PipeRegs[opgrp] ),
.FmtUnitTypes ( Implementation.UnitTypes[opgrp] ),
.PipeConfig ( Implementation.PipeConfig ),
.TagType ( TagType ),
.TrueSIMDClass ( TrueSIMDClass )
) i_opgroup_block (
.clk_i,
.rst_ni,
.operands_i ( operands_i[NUM_OPS-1:0] ),
.is_boxed_i ( input_boxed ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.int_fmt_i,
.vectorial_op_i,
.tag_i,
.simd_mask_i ( simd_mask ),
.in_valid_i ( in_valid ),
.in_ready_o ( opgrp_in_ready[opgrp] ),
.flush_i,
.result_o ( opgrp_outputs[opgrp].result ),
.status_o ( opgrp_outputs[opgrp].status ),
.extension_bit_o ( opgrp_ext[opgrp] ),
.tag_o ( opgrp_outputs[opgrp].tag ),
.out_valid_o ( opgrp_out_valid[opgrp] ),
.out_ready_i ( opgrp_out_ready[opgrp] ),
.busy_o ( opgrp_busy[opgrp] )
);
end
// ------------------
// Arbitrate Outputs
// ------------------
output_t arbiter_output;
// Round-Robin arbiter to decide which result to use
rr_arb_tree #(
.NumIn ( NUM_OPGROUPS ),
.DataType ( output_t ),
.AxiVldRdy ( 1'b1 )
) i_arbiter (
.clk_i,
.rst_ni,
.flush_i,
.rr_i ( '0 ),
.req_i ( opgrp_out_valid ),
.gnt_o ( opgrp_out_ready ),
.data_i ( opgrp_outputs ),
.gnt_i ( out_ready_i ),
.req_o ( out_valid_o ),
.data_o ( arbiter_output ),
.idx_o ( /* unused */ )
);
// Unpack output
assign result_o = arbiter_output.result;
assign status_o = arbiter_output.status;
assign tag_o = arbiter_output.tag;
assign busy_o = (| opgrp_busy);
endmodule

View file

@ -1,25 +0,0 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
In this sense, we interpret the "Public API" of a hardware module as its port/parameter list.
Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility.
## [Unreleased]
### Added
### Changed
### Fixed
## [1.0.4] - 2020-10-05
### Added
- Started formal changelog
### Changed
### Fixed
- Signalling behavior for underflows to 0.0

View file

@ -1,176 +0,0 @@
SOLDERPAD HARDWARE LICENSE version 0.51
This license is based closely on the Apache License Version 2.0, but is not
approved or endorsed by the Apache Foundation. A copy of the non-modified
Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
As this license is not currently OSI or FSF approved, the Licensor permits any
Work licensed under this License, at the option of the Licensee, to be treated
as licensed under the Apache License Version 2.0 (which is so approved).
This License is licensed under the terms of this License and in particular
clause 7 below (Disclaimer of Warranties) applies in relation to its use.
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and
distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the Rights owner or entity authorized by the Rights owner
that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities
that control, are controlled by, or are under common control with that entity.
For the purposes of this definition, "control" means (i) the power, direct or
indirect, to cause the direction or management of such entity, whether by
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising
permissions granted by this License.
"Rights" means copyright and any similar right including design right (whether
registered or unregistered), semiconductor topography (mask) rights and
database rights (but excluding Patents and Trademarks).
"Source" form shall mean the preferred form for making modifications, including
but not limited to source code, net lists, board layouts, CAD files,
documentation source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation or
translation of a Source form, including but not limited to compiled object
code, generated documentation, the instantiation of a hardware design and
conversions to other media types, including intermediate forms such as
bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
works).
"Work" shall mean the work of authorship, whether in Source form or other
Object form, made available under the License, as indicated by a Rights notice
that is included in or attached to the work (an example is provided in the
Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that
is based on (or derived from) the Work and for which the editorial revisions,
annotations, elaborations, or other modifications represent, as a whole, an
original work of authorship. For the purposes of this License, Derivative Works
shall not include works that remain separable from, or merely link (or bind by
name) or physically connect to or interoperate with the interfaces of, the Work
and Derivative Works thereof.
"Contribution" shall mean any design or work of authorship, including the
original version of the Work and any modifications or additions to that Work or
Derivative Works thereof, that is intentionally submitted to Licensor for
inclusion in the Work by the Rights owner or by an individual or Legal Entity
authorized to submit on behalf of the Rights owner. For the purposes of this
definition, "submitted" means any form of electronic, verbal, or written
communication sent to the Licensor or its representatives, including but not
limited to communication on electronic mailing lists, source code control
systems, and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but excluding
communication that is conspicuously marked or otherwise designated in writing
by the Rights owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
of whom a Contribution has been received by Licensor and subsequently
incorporated within the Work.
2. Grant of License. Subject to the terms and conditions of this License, each
Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable license under the Rights to reproduce,
prepare Derivative Works of, publicly display, publicly perform, sublicense,
and distribute the Work and such Derivative Works in Source or Object form and
do anything in relation to the Work as if the Rights did not exist.
3. Grant of Patent License. Subject to the terms and conditions of this
License, each Contributor hereby grants to You a perpetual, worldwide,
non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
section) patent license to make, have made, use, offer to sell, sell, import,
and otherwise transfer the Work, where such license applies only to those
patent claims licensable by such Contributor that are necessarily infringed by
their Contribution(s) alone or by combination of their Contribution(s) with the
Work to which such Contribution(s) was submitted. If You institute patent
litigation against any entity (including a cross-claim or counterclaim in a
lawsuit) alleging that the Work or a Contribution incorporated within the Work
constitutes direct or contributory patent infringement, then any patent
licenses granted to You under this License for that Work shall terminate as of
the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the Work or
Derivative Works thereof in any medium, with or without modifications, and in
Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy
of this License; and
You must cause any modified files to carry prominent notices stating that
You changed the files; and
You must retain, in the Source form of any Derivative Works that You
distribute, all copyright, patent, trademark, and attribution notices from
the Source form of the Work, excluding those notices that do not pertain to
any part of the Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution, then
any Derivative Works that You distribute must include a readable copy of
the attribution notices contained within such NOTICE file, excluding those
notices that do not pertain to any part of the Derivative Works, in at
least one of the following places: within a NOTICE text file distributed as
part of the Derivative Works; within the Source form or documentation, if
provided along with the Derivative Works; or, within a display generated by
the Derivative Works, if and wherever such third-party notices normally
appear. The contents of the NOTICE file are for informational purposes only
and do not modify the License. You may add Your own attribution notices
within Derivative Works that You distribute, alongside or as an addendum to
the NOTICE text from the Work, provided that such additional attribution
notices cannot be construed as modifying the License. You may add Your own
copyright statement to Your modifications and may provide additional or
different license terms and conditions for use, reproduction, or
distribution of Your modifications, or for any such Derivative Works as a
whole, provided Your use, reproduction, and distribution of the Work
otherwise complies with the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise, any
Contribution intentionally submitted for inclusion in the Work by You to the
Licensor shall be under the terms and conditions of this License, without any
additional terms or conditions. Notwithstanding the above, nothing herein shall
supersede or modify the terms of any separate license agreement you may have
executed with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade names,
trademarks, service marks, or product names of the Licensor, except as required
for reasonable and customary use in describing the origin of the Work and
reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
writing, Licensor provides the Work (and each Contributor provides its
Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied, including, without limitation, any warranties
or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any risks
associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory, whether in
tort (including negligence), contract, or otherwise, unless required by
applicable law (such as deliberate and grossly negligent acts) or agreed to in
writing, shall any Contributor be liable to You for damages, including any
direct, indirect, special, incidental, or consequential damages of any
character arising as a result of this License or out of the use or inability to
use the Work (including but not limited to damages for loss of goodwill, work
stoppage, computer failure or malfunction, or any and all other commercial
damages or losses), even if such Contributor has been advised of the
possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing the Work or
Derivative Works thereof, You may choose to offer, and charge a fee for,
acceptance of support, warranty, indemnity, or other liability obligations
and/or rights consistent with this License. However, in accepting such
obligations, You may act only on Your own behalf and on Your sole
responsibility, not on behalf of any other Contributor, and only if You agree
to indemnify, defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason of your
accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

View file

@ -1,2 +0,0 @@
*~
*/*~

File diff suppressed because it is too large Load diff

View file

@ -1,83 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// This file contains all div_sqrt_top_mvp parameters
// Authors : Lei Li (lile@iis.ee.ethz.ch)
package defs_div_sqrt_mvp;
// op command
localparam C_RM = 3;
localparam C_RM_NEAREST = 3'h0;
localparam C_RM_TRUNC = 3'h1;
localparam C_RM_PLUSINF = 3'h2;
localparam C_RM_MINUSINF = 3'h3;
localparam C_PC = 6; // Precision Control
localparam C_FS = 2; // Format Selection
localparam C_IUNC = 2; // Iteration Unit Number Control
localparam Iteration_unit_num_S = 2'b10;
// FP64
localparam C_OP_FP64 = 64;
localparam C_MANT_FP64 = 52;
localparam C_EXP_FP64 = 11;
localparam C_BIAS_FP64 = 1023;
localparam C_BIAS_AONE_FP64 = 11'h400;
localparam C_HALF_BIAS_FP64 = 511;
localparam C_EXP_ZERO_FP64 = 11'h000;
localparam C_EXP_ONE_FP64 = 13'h001; // Bit width is in agreement with in norm
localparam C_EXP_INF_FP64 = 11'h7FF;
localparam C_MANT_ZERO_FP64 = 52'h0;
localparam C_MANT_NAN_FP64 = 52'h8_0000_0000_0000;
localparam C_PZERO_FP64 = 64'h0000_0000_0000_0000;
localparam C_MZERO_FP64 = 64'h8000_0000_0000_0000;
localparam C_QNAN_FP64 = 64'h7FF8_0000_0000_0000;
// FP32
localparam C_OP_FP32 = 32;
localparam C_MANT_FP32 = 23;
localparam C_EXP_FP32 = 8;
localparam C_BIAS_FP32 = 127;
localparam C_BIAS_AONE_FP32 = 8'h80;
localparam C_HALF_BIAS_FP32 = 63;
localparam C_EXP_ZERO_FP32 = 8'h00;
localparam C_EXP_INF_FP32 = 8'hFF;
localparam C_MANT_ZERO_FP32 = 23'h0;
localparam C_PZERO_FP32 = 32'h0000_0000;
localparam C_MZERO_FP32 = 32'h8000_0000;
localparam C_QNAN_FP32 = 32'h7FC0_0000;
// FP16
localparam C_OP_FP16 = 16;
localparam C_MANT_FP16 = 10;
localparam C_EXP_FP16 = 5;
localparam C_BIAS_FP16 = 15;
localparam C_BIAS_AONE_FP16 = 5'h10;
localparam C_HALF_BIAS_FP16 = 7;
localparam C_EXP_ZERO_FP16 = 5'h00;
localparam C_EXP_INF_FP16 = 5'h1F;
localparam C_MANT_ZERO_FP16 = 10'h0;
localparam C_PZERO_FP16 = 16'h0000;
localparam C_MZERO_FP16 = 16'h8000;
localparam C_QNAN_FP16 = 16'h7E00;
// FP16alt
localparam C_OP_FP16ALT = 16;
localparam C_MANT_FP16ALT = 7;
localparam C_EXP_FP16ALT = 8;
localparam C_BIAS_FP16ALT = 127;
localparam C_BIAS_AONE_FP16ALT = 8'h80;
localparam C_HALF_BIAS_FP16ALT = 63;
localparam C_EXP_ZERO_FP16ALT = 8'h00;
localparam C_EXP_INF_FP16ALT = 8'hFF;
localparam C_MANT_ZERO_FP16ALT = 7'h0;
localparam C_QNAN_FP16ALT = 16'h7FC0;
endpackage : defs_div_sqrt_mvp

View file

@ -1,232 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Company: IIS @ ETHZ - Federal Institute of Technology //
// //
// Engineers: Lei Li -- lile@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// //
// //
// //
// Create Date: 20/04/2018 //
// Design Name: FPU //
// Module Name: div_sqrt_mvp_wrapper.sv //
// Project Name: The shared divisor and square root //
// Language: SystemVerilog //
// //
// Description: The wrapper of div_sqrt_top_mvp //
// //
// //
// //
// //
// //
// //
// //
// //
// //
// //
// //
////////////////////////////////////////////////////////////////////////////////
import defs_div_sqrt_mvp::*;
module div_sqrt_mvp_wrapper
#(
parameter PrePipeline_depth_S = 0, // If you want to add a flip/flop stage before preprocess, set it to 1.
parameter PostPipeline_depth_S = 2 // The output delay stages
)
(//Input
input logic Clk_CI,
input logic Rst_RBI,
input logic Div_start_SI,
input logic Sqrt_start_SI,
//Input Operands
input logic [C_OP_FP64-1:0] Operand_a_DI,
input logic [C_OP_FP64-1:0] Operand_b_DI,
// Input Control
input logic [C_RM-1:0] RM_SI, //Rounding Mode
input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control
input logic [C_FS-1:0] Format_sel_SI, // Format Selection,
input logic Kill_SI,
//Output Result
output logic [C_OP_FP64-1:0] Result_DO,
//Output-Flags
output logic [4:0] Fflags_SO,
output logic Ready_SO,
output logic Done_SO
);
logic Div_start_S_S,Sqrt_start_S_S;
logic [C_OP_FP64-1:0] Operand_a_S_D;
logic [C_OP_FP64-1:0] Operand_b_S_D;
// Input Control
logic [C_RM-1:0] RM_S_S; //Rounding Mode
logic [C_PC-1:0] Precision_ctl_S_S; // Precision Control
logic [C_FS-1:0] Format_sel_S_S; // Format Selection,
logic Kill_S_S;
logic [C_OP_FP64-1:0] Result_D;
logic Ready_S;
logic Done_S;
logic [4:0] Fflags_S;
generate
if(PrePipeline_depth_S==1)
begin
div_sqrt_top_mvp div_top_U0 //for RTL
(//Input
.Clk_CI (Clk_CI),
.Rst_RBI (Rst_RBI),
.Div_start_SI (Div_start_S_S),
.Sqrt_start_SI (Sqrt_start_S_S),
//Input Operands
.Operand_a_DI (Operand_a_S_D),
.Operand_b_DI (Operand_b_S_D),
.RM_SI (RM_S_S), //Rounding Mode
.Precision_ctl_SI (Precision_ctl_S_S),
.Format_sel_SI (Format_sel_S_S),
.Kill_SI (Kill_S_S),
.Result_DO (Result_D),
.Fflags_SO (Fflags_S),
.Ready_SO (Ready_S),
.Done_SO (Done_S)
);
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Div_start_S_S<='0;
Sqrt_start_S_S<=1'b0;
Operand_a_S_D<='0;
Operand_b_S_D<='0;
RM_S_S <=1'b0;
Precision_ctl_S_S<='0;
Format_sel_S_S<='0;
Kill_S_S<='0;
end
else
begin
Div_start_S_S<=Div_start_SI;
Sqrt_start_S_S<=Sqrt_start_SI;
Operand_a_S_D<=Operand_a_DI;
Operand_b_S_D<=Operand_b_DI;
RM_S_S <=RM_SI;
Precision_ctl_S_S<=Precision_ctl_SI;
Format_sel_S_S<=Format_sel_SI;
Kill_S_S<=Kill_SI;
end
end
end
else
begin
div_sqrt_top_mvp div_top_U0 //for RTL
(//Input
.Clk_CI (Clk_CI),
.Rst_RBI (Rst_RBI),
.Div_start_SI (Div_start_SI),
.Sqrt_start_SI (Sqrt_start_SI),
//Input Operands
.Operand_a_DI (Operand_a_DI),
.Operand_b_DI (Operand_b_DI),
.RM_SI (RM_SI), //Rounding Mode
.Precision_ctl_SI (Precision_ctl_SI),
.Format_sel_SI (Format_sel_SI),
.Kill_SI (Kill_SI),
.Result_DO (Result_D),
.Fflags_SO (Fflags_S),
.Ready_SO (Ready_S),
.Done_SO (Done_S)
);
end
endgenerate
/////////////////////////////////////////////////////////////////////////////
// First Stage of Outputs
/////////////////////////////////////////////////////////////////////////////
logic [C_OP_FP64-1:0] Result_dly_S_D;
logic Ready_dly_S_S;
logic Done_dly_S_S;
logic [4:0] Fflags_dly_S_S;
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Result_dly_S_D<='0;
Ready_dly_S_S<=1'b0;
Done_dly_S_S<=1'b0;
Fflags_dly_S_S<=1'b0;
end
else
begin
Result_dly_S_D<=Result_D;
Ready_dly_S_S<=Ready_S;
Done_dly_S_S<=Done_S;
Fflags_dly_S_S<=Fflags_S;
end
end
/////////////////////////////////////////////////////////////////////////////
// Second Stage of Outputs
/////////////////////////////////////////////////////////////////////////////
logic [C_OP_FP64-1:0] Result_dly_D_D;
logic Ready_dly_D_S;
logic Done_dly_D_S;
logic [4:0] Fflags_dly_D_S;
generate
if(PostPipeline_depth_S==2)
begin
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Result_dly_D_D<='0;
Ready_dly_D_S<=1'b0;
Done_dly_D_S<=1'b0;
Fflags_dly_D_S<=1'b0;
end
else
begin
Result_dly_D_D<=Result_dly_S_D;
Ready_dly_D_S<=Ready_dly_S_S;
Done_dly_D_S<=Done_dly_S_S;
Fflags_dly_D_S<=Fflags_dly_S_S;
end
end
assign Result_DO = Result_dly_D_D;
assign Ready_SO = Ready_dly_D_S;
assign Done_SO = Done_dly_D_S;
assign Fflags_SO=Fflags_dly_D_S;
end
else
begin
assign Result_DO = Result_dly_S_D;
assign Ready_SO = Ready_dly_S_S;
assign Done_SO = Done_dly_S_S;
assign Fflags_SO = Fflags_dly_S_S;
end
endgenerate
endmodule //

View file

@ -1,180 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Company: IIS @ ETHZ - Federal Institute of Technology //
// //
// Engineers: Lei Li -- lile@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// //
// //
// //
// Create Date: 03/03/2018 //
// Design Name: div_sqrt_top_mvp //
// Module Name: div_sqrt_top_mvp.sv //
// Project Name: The shared divisor and square root //
// Language: SystemVerilog //
// //
// Description: The top of div and sqrt //
// //
// //
// Revision Date: 12/04/2018 //
// Lei Li //
// To address some requirements by Stefan and add low power //
// control for special cases //
////////////////////////////////////////////////////////////////////////////////
import defs_div_sqrt_mvp::*;
module div_sqrt_top_mvp
(//Input
input logic Clk_CI,
input logic Rst_RBI,
input logic Div_start_SI,
input logic Sqrt_start_SI,
//Input Operands
input logic [C_OP_FP64-1:0] Operand_a_DI,
input logic [C_OP_FP64-1:0] Operand_b_DI,
// Input Control
input logic [C_RM-1:0] RM_SI, //Rounding Mode
input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control
input logic [C_FS-1:0] Format_sel_SI, // Format Selection,
input logic Kill_SI,
//Output Result
output logic [C_OP_FP64-1:0] Result_DO,
//Output-Flags
output logic [4:0] Fflags_SO,
output logic Ready_SO,
output logic Done_SO
);
//Operand components
logic [C_EXP_FP64:0] Exp_a_D;
logic [C_EXP_FP64:0] Exp_b_D;
logic [C_MANT_FP64:0] Mant_a_D;
logic [C_MANT_FP64:0] Mant_b_D;
logic [C_EXP_FP64+1:0] Exp_z_D;
logic [C_MANT_FP64+4:0] Mant_z_D;
logic Sign_z_D;
logic Start_S;
logic [C_RM-1:0] RM_dly_S;
logic Div_enable_S;
logic Sqrt_enable_S;
logic Inf_a_S;
logic Inf_b_S;
logic Zero_a_S;
logic Zero_b_S;
logic NaN_a_S;
logic NaN_b_S;
logic SNaN_S;
logic Special_case_SB,Special_case_dly_SB;
logic Full_precision_S;
logic FP32_S;
logic FP64_S;
logic FP16_S;
logic FP16ALT_S;
preprocess_mvp preprocess_U0
(
.Clk_CI (Clk_CI ),
.Rst_RBI (Rst_RBI ),
.Div_start_SI (Div_start_SI ),
.Sqrt_start_SI (Sqrt_start_SI ),
.Ready_SI (Ready_SO ),
.Operand_a_DI (Operand_a_DI ),
.Operand_b_DI (Operand_b_DI ),
.RM_SI (RM_SI ),
.Format_sel_SI (Format_sel_SI ),
.Start_SO (Start_S ),
.Exp_a_DO_norm (Exp_a_D ),
.Exp_b_DO_norm (Exp_b_D ),
.Mant_a_DO_norm (Mant_a_D ),
.Mant_b_DO_norm (Mant_b_D ),
.RM_dly_SO (RM_dly_S ),
.Sign_z_DO (Sign_z_D ),
.Inf_a_SO (Inf_a_S ),
.Inf_b_SO (Inf_b_S ),
.Zero_a_SO (Zero_a_S ),
.Zero_b_SO (Zero_b_S ),
.NaN_a_SO (NaN_a_S ),
.NaN_b_SO (NaN_b_S ),
.SNaN_SO (SNaN_S ),
.Special_case_SBO (Special_case_SB ),
.Special_case_dly_SBO (Special_case_dly_SB)
);
nrbd_nrsc_mvp nrbd_nrsc_U0
(
.Clk_CI (Clk_CI ),
.Rst_RBI (Rst_RBI ),
.Div_start_SI (Div_start_SI ) ,
.Sqrt_start_SI (Sqrt_start_SI ),
.Start_SI (Start_S ),
.Kill_SI (Kill_SI ),
.Special_case_SBI (Special_case_SB ),
.Special_case_dly_SBI (Special_case_dly_SB),
.Div_enable_SO (Div_enable_S ),
.Sqrt_enable_SO (Sqrt_enable_S ),
.Precision_ctl_SI (Precision_ctl_SI ),
.Format_sel_SI (Format_sel_SI ),
.Exp_a_DI (Exp_a_D ),
.Exp_b_DI (Exp_b_D ),
.Mant_a_DI (Mant_a_D ),
.Mant_b_DI (Mant_b_D ),
.Full_precision_SO (Full_precision_S ),
.FP32_SO (FP32_S ),
.FP64_SO (FP64_S ),
.FP16_SO (FP16_S ),
.FP16ALT_SO (FP16ALT_S ),
.Ready_SO (Ready_SO ),
.Done_SO (Done_SO ),
.Exp_z_DO (Exp_z_D ),
.Mant_z_DO (Mant_z_D )
);
norm_div_sqrt_mvp fpu_norm_U0
(
.Mant_in_DI (Mant_z_D ),
.Exp_in_DI (Exp_z_D ),
.Sign_in_DI (Sign_z_D ),
.Div_enable_SI (Div_enable_S ),
.Sqrt_enable_SI (Sqrt_enable_S ),
.Inf_a_SI (Inf_a_S ),
.Inf_b_SI (Inf_b_S ),
.Zero_a_SI (Zero_a_S ),
.Zero_b_SI (Zero_b_S ),
.NaN_a_SI (NaN_a_S ),
.NaN_b_SI (NaN_b_S ),
.SNaN_SI (SNaN_S ),
.RM_SI (RM_dly_S ),
.Full_precision_SI (Full_precision_S ),
.FP32_SI (FP32_S ),
.FP64_SI (FP64_S ),
.FP16_SI (FP16_S ),
.FP16ALT_SI (FP16ALT_S ),
.Result_DO (Result_DO ),
.Fflags_SO (Fflags_SO ) //{NV,DZ,OF,UF,NX}
);
endmodule

View file

@ -1,61 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Company: IIS @ ETHZ - Federal Institute of Technology //
// //
// Engineers: Lei Li lile@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// //
// //
// //
// Create Date: 12/01/2017 //
// Design Name: FPU //
// Module Name: iteration_div_sqrt_mvp //
// Project Name: Private FPU //
// Language: SystemVerilog //
// //
// Description: iteration unit for div and sqrt //
// //
// //
// Revision: 03/14/2018 //
// For div_sqrt_mvp //
////////////////////////////////////////////////////////////////////////////////
module iteration_div_sqrt_mvp
#(
parameter WIDTH=25
)
(//Input
input logic [WIDTH-1:0] A_DI,
input logic [WIDTH-1:0] B_DI,
input logic Div_enable_SI,
input logic Div_start_dly_SI,
input logic Sqrt_enable_SI,
input logic [1:0] D_DI,
output logic [1:0] D_DO,
output logic [WIDTH-1:0] Sum_DO,
output logic Carry_out_DO
);
logic D_carry_D;
logic Sqrt_cin_D;
logic Cin_D;
assign D_DO[0]=~D_DI[0];
assign D_DO[1]=~(D_DI[1] ^ D_DI[0]);
assign D_carry_D=D_DI[1] | D_DI[0];
assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D;
assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D;
assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D;
endmodule

View file

@ -1,470 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Company: IIS @ ETHZ - Federal Institute of Technology //
// //
// Engineers: Lei Li lile@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// //
// //
// //
// Create Date: 09/03/2018 //
// Design Name: FPU //
// Module Name: norm_div_sqrt_mvp.sv //
// Project Name: //
// Language: SystemVerilog //
// //
// Description: Floating point Normalizer/Rounding unit //
// Since this module is design as a combinatinal logic, it can//
// be added arbinary register stages for different frequency //
// in the wrapper module. //
// //
// //
// //
// Revision Date: 12/04/2018 //
// Lei Li //
// To address some requirements by Stefan //
// //
// //
// //
// //
// //
// //
////////////////////////////////////////////////////////////////////////////////
import defs_div_sqrt_mvp::*;
module norm_div_sqrt_mvp
(//Inputs
input logic [C_MANT_FP64+4:0] Mant_in_DI, // Include the needed 4-bit for rounding and hidden bit
input logic signed [C_EXP_FP64+1:0] Exp_in_DI,
input logic Sign_in_DI,
input logic Div_enable_SI,
input logic Sqrt_enable_SI,
input logic Inf_a_SI,
input logic Inf_b_SI,
input logic Zero_a_SI,
input logic Zero_b_SI,
input logic NaN_a_SI,
input logic NaN_b_SI,
input logic SNaN_SI,
input logic [C_RM-1:0] RM_SI,
input logic Full_precision_SI,
input logic FP32_SI,
input logic FP64_SI,
input logic FP16_SI,
input logic FP16ALT_SI,
//Outputs
output logic [C_EXP_FP64+C_MANT_FP64:0] Result_DO,
output logic [4:0] Fflags_SO //{NV,DZ,OF,UF,NX}
);
logic Sign_res_D;
logic NV_OP_S;
logic Exp_OF_S;
logic Exp_UF_S;
logic Div_Zero_S;
logic In_Exact_S;
/////////////////////////////////////////////////////////////////////////////
// Normalization //
/////////////////////////////////////////////////////////////////////////////
logic [C_MANT_FP64:0] Mant_res_norm_D;
logic [C_EXP_FP64-1:0] Exp_res_norm_D;
/////////////////////////////////////////////////////////////////////////////
// Right shift operations for negtive exponents //
/////////////////////////////////////////////////////////////////////////////
logic [C_EXP_FP64+1:0] Exp_Max_RS_FP64_D;
logic [C_EXP_FP32+1:0] Exp_Max_RS_FP32_D;
logic [C_EXP_FP16+1:0] Exp_Max_RS_FP16_D;
logic [C_EXP_FP16ALT+1:0] Exp_Max_RS_FP16ALT_D;
//
assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative
assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative
assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative
assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative
logic [C_EXP_FP64+1:0] Num_RS_D;
assign Num_RS_D=~Exp_in_DI+1+1; // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative
logic [C_MANT_FP64:0] Mant_RS_D;
logic [C_MANT_FP64+4:0] Mant_forsticky_D;
assign {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); //
//
logic [C_EXP_FP64+1:0] Exp_subOne_D;
assign Exp_subOne_D = Exp_in_DI -1;
//normalization
logic [1:0] Mant_lower_D;
logic Mant_sticky_bit_D;
logic [C_MANT_FP64+4:0] Mant_forround_D;
always_comb
begin
if(NaN_a_SI) // if a is NaN, return NaN
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=1'b0;
NV_OP_S = SNaN_SI;
end
else if(NaN_b_SI) //if b is NaN, return NaN
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=1'b0;
NV_OP_S = SNaN_SI;
end
else if(Inf_a_SI)
begin
if(Div_enable_SI&&Inf_b_SI) //Inf/Inf, retrurn NaN
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=1'b0;
NV_OP_S = 1'b1;
end
else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf)
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=1'b0;
NV_OP_S = 1'b1;
end else begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b1;
Exp_UF_S=1'b0;
Mant_res_norm_D= '0;
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
end
else if(Div_enable_SI&&Inf_b_SI)
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b1;
Exp_UF_S=1'b0;
Mant_res_norm_D= '0;
Exp_res_norm_D='0;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else if(Zero_a_SI)
begin
if(Div_enable_SI&&Zero_b_SI)
begin
Div_Zero_S=1'b1;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=1'b0;
NV_OP_S = 1'b1;
end
else
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D='0;
Exp_res_norm_D='0;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
end
else if(Div_enable_SI&&(Zero_b_SI)) //div Zero
begin
Div_Zero_S=1'b1;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D='0;
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else if(Sign_in_DI&&Sqrt_enable_SI) //sqrt(-a)
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=1'b0;
NV_OP_S = 1'b1;
end
else if((Exp_in_DI[C_EXP_FP64:0]=='0))
begin
if(Mant_in_DI!='0) //Exp=0, Mant!=0, it is denormal
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b1;
Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]};
Exp_res_norm_D='0;
Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} };
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else // Zero
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D='0;
Exp_res_norm_D='0;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
end
else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4])) //denormal
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b1;
Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4];
Exp_res_norm_D='0;
Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else if(Exp_in_DI[C_EXP_FP64+1]) //minus //consider format
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b1;
Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]};
Exp_res_norm_D='0;
Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]}; //??
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) ) //OF
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b1;
Exp_UF_S=1'b0;
Mant_res_norm_D='0;
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) | ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255
begin
if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
Exp_res_norm_D=Exp_subOne_D;
Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else if(Mant_in_DI!='0) //NaN
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b1;
Exp_UF_S=1'b0;
Mant_res_norm_D= '0;
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else //infinity
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b1;
Exp_UF_S=1'b0;
Mant_res_norm_D= '0;
Exp_res_norm_D='1;
Mant_forround_D='0;
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
end
else if(Mant_in_DI[C_MANT_FP64+4]) //normal numbers with 1.XXX
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4];
Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0];
Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
else //normal numbers with 0.1XX
begin
Div_Zero_S=1'b0;
Exp_OF_S=1'b0;
Exp_UF_S=1'b0;
Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
Exp_res_norm_D=Exp_subOne_D;
Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
Sign_res_D=Sign_in_DI;
NV_OP_S = 1'b0;
end
end
/////////////////////////////////////////////////////////////////////////////
// Rounding enable only for full precision (Full_precision_SI==1'b1) //
/////////////////////////////////////////////////////////////////////////////
logic [C_MANT_FP64:0] Mant_upper_D;
logic [C_MANT_FP64+1:0] Mant_upperRounded_D;
logic Mant_roundUp_S;
logic Mant_rounded_S;
always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D
begin
if(FP32_SI)
begin
Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2];
Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0];
end
else if(FP64_SI)
begin
Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0];
Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3];
Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0];
end
else if(FP16_SI)
begin
Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2];
Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30];
end
else //FP16ALT
begin
Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2];
Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30];
end
end
assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D;
always_comb //determine whether to round up or not
begin
Mant_roundUp_S = 1'b0;
case (RM_SI)
C_RM_NEAREST :
Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) );
C_RM_TRUNC :
Mant_roundUp_S = 0;
C_RM_PLUSINF :
Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
C_RM_MINUSINF:
Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
default :
Mant_roundUp_S = 0;
endcase // case (RM_DI)
end // always_comb begin
logic Mant_renorm_S;
logic [C_MANT_FP64:0] Mant_roundUp_Vector_S; // for all the formats
assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)};
assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S;
assign Mant_renorm_S = Mant_upperRounded_D[C_MANT_FP64+1];
/////////////////////////////////////////////////////////////////////////////
// Renormalization for Rounding //
/////////////////////////////////////////////////////////////////////////////
logic [C_MANT_FP64-1:0] Mant_res_round_D;
logic [C_EXP_FP64-1:0] Exp_res_round_D;
assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit
assign Exp_res_round_D = Exp_res_norm_D+Mant_renorm_S;
/////////////////////////////////////////////////////////////////////////////
// Output Assignments //
/////////////////////////////////////////////////////////////////////////////
logic [C_MANT_FP64-1:0] Mant_before_format_ctl_D;
logic [C_EXP_FP64-1:0] Exp_before_format_ctl_D;
assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D;
assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D;
always_comb //NaN Boxing
begin //
if(FP32_SI)
begin
Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]};
end
else if(FP64_SI)
begin
Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]};
end
else if(FP16_SI)
begin
Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]};
end
else
begin
Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]};
end
end
assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S;
assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX}
endmodule // norm_div_sqrt_mvp

View file

@ -1,104 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Company: IIS @ ETHZ - Federal Institute of Technology //
// //
// Engineers: Lei Li lile@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// //
// //
// //
// Create Date: 10/04/2018 //
// Design Name: FPU //
// Module Name: nrbd_nrsc_mvp.sv //
// Project Name: Private FPU //
// Language: SystemVerilog //
// //
// Description: non restroring binary divisior/ square root //
// //
// Revision Date: 12/04/2018 //
// Lei Li //
// To address some requirements by Stefan and add low power //
// control for special cases //
// //
////////////////////////////////////////////////////////////////////////////////
import defs_div_sqrt_mvp::*;
module nrbd_nrsc_mvp
(//Input
input logic Clk_CI,
input logic Rst_RBI,
input logic Div_start_SI,
input logic Sqrt_start_SI,
input logic Start_SI,
input logic Kill_SI,
input logic Special_case_SBI,
input logic Special_case_dly_SBI,
input logic [C_PC-1:0] Precision_ctl_SI,
input logic [1:0] Format_sel_SI,
input logic [C_MANT_FP64:0] Mant_a_DI,
input logic [C_MANT_FP64:0] Mant_b_DI,
input logic [C_EXP_FP64:0] Exp_a_DI,
input logic [C_EXP_FP64:0] Exp_b_DI,
//output
output logic Div_enable_SO,
output logic Sqrt_enable_SO,
output logic Full_precision_SO,
output logic FP32_SO,
output logic FP64_SO,
output logic FP16_SO,
output logic FP16ALT_SO,
output logic Ready_SO,
output logic Done_SO,
output logic [C_MANT_FP64+4:0] Mant_z_DO,
output logic [C_EXP_FP64+1:0] Exp_z_DO
);
logic Div_start_dly_S,Sqrt_start_dly_S;
control_mvp control_U0
( .Clk_CI (Clk_CI ),
.Rst_RBI (Rst_RBI ),
.Div_start_SI (Div_start_SI ),
.Sqrt_start_SI (Sqrt_start_SI ),
.Start_SI (Start_SI ),
.Kill_SI (Kill_SI ),
.Special_case_SBI (Special_case_SBI ),
.Special_case_dly_SBI (Special_case_dly_SBI ),
.Precision_ctl_SI (Precision_ctl_SI ),
.Format_sel_SI (Format_sel_SI ),
.Numerator_DI (Mant_a_DI ),
.Exp_num_DI (Exp_a_DI ),
.Denominator_DI (Mant_b_DI ),
.Exp_den_DI (Exp_b_DI ),
.Div_start_dly_SO (Div_start_dly_S ),
.Sqrt_start_dly_SO (Sqrt_start_dly_S ),
.Div_enable_SO (Div_enable_SO ),
.Sqrt_enable_SO (Sqrt_enable_SO ),
.Full_precision_SO (Full_precision_SO ),
.FP32_SO (FP32_SO ),
.FP64_SO (FP64_SO ),
.FP16_SO (FP16_SO ),
.FP16ALT_SO (FP16ALT_SO ),
.Ready_SO (Ready_SO ),
.Done_SO (Done_SO ),
.Mant_result_prenorm_DO (Mant_z_DO ),
.Exp_result_prenorm_DO (Exp_z_DO )
);
endmodule

View file

@ -1,425 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// Company: IIS @ ETHZ - Federal Institute of Technology //
// //
// Engineers: Lei Li //lile@iis.ee.ethz.ch //
// //
// Additional contributions by: //
// //
// //
// //
// Create Date: 01/03/2018 //
// Design Name: FPU //
// Module Name: preprocess_mvp.sv //
// Project Name: Private FPU //
// Language: SystemVerilog //
// //
// Description: decode and data preparation //
// //
// Revision Date: 12/04/2018 //
// Lei Li //
// To address some requirements by Stefan and add low power //
// control for special cases //
// //
// //
////////////////////////////////////////////////////////////////////////////////
import defs_div_sqrt_mvp::*;
module preprocess_mvp
(
input logic Clk_CI,
input logic Rst_RBI,
input logic Div_start_SI,
input logic Sqrt_start_SI,
input logic Ready_SI,
//Input Operands
input logic [C_OP_FP64-1:0] Operand_a_DI,
input logic [C_OP_FP64-1:0] Operand_b_DI,
input logic [C_RM-1:0] RM_SI, //Rounding Mode
input logic [C_FS-1:0] Format_sel_SI, // Format Selection
// to control
output logic Start_SO,
output logic [C_EXP_FP64:0] Exp_a_DO_norm,
output logic [C_EXP_FP64:0] Exp_b_DO_norm,
output logic [C_MANT_FP64:0] Mant_a_DO_norm,
output logic [C_MANT_FP64:0] Mant_b_DO_norm,
output logic [C_RM-1:0] RM_dly_SO,
output logic Sign_z_DO,
output logic Inf_a_SO,
output logic Inf_b_SO,
output logic Zero_a_SO,
output logic Zero_b_SO,
output logic NaN_a_SO,
output logic NaN_b_SO,
output logic SNaN_SO,
output logic Special_case_SBO,
output logic Special_case_dly_SBO
);
//Hidden Bits
logic Hb_a_D;
logic Hb_b_D;
logic [C_EXP_FP64-1:0] Exp_a_D;
logic [C_EXP_FP64-1:0] Exp_b_D;
logic [C_MANT_FP64-1:0] Mant_a_NonH_D;
logic [C_MANT_FP64-1:0] Mant_b_NonH_D;
logic [C_MANT_FP64:0] Mant_a_D;
logic [C_MANT_FP64:0] Mant_b_D;
/////////////////////////////////////////////////////////////////////////////
// Disassemble operands
/////////////////////////////////////////////////////////////////////////////
logic Sign_a_D,Sign_b_D;
logic Start_S;
always_comb
begin
case(Format_sel_SI)
2'b00:
begin
Sign_a_D = Operand_a_DI[C_OP_FP32-1];
Sign_b_D = Operand_b_DI[C_OP_FP32-1];
Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]};
Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]};
Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0};
Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0};
end
2'b01:
begin
Sign_a_D = Operand_a_DI[C_OP_FP64-1];
Sign_b_D = Operand_b_DI[C_OP_FP64-1];
Exp_a_D = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64];
Exp_b_D = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64];
Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0];
Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0];
end
2'b10:
begin
Sign_a_D = Operand_a_DI[C_OP_FP16-1];
Sign_b_D = Operand_b_DI[C_OP_FP16-1];
Exp_a_D = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]};
Exp_b_D = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]};
Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0};
Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0};
end
2'b11:
begin
Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1];
Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1];
Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0};
Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0};
end
endcase
end
assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D};
assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D};
assign Hb_a_D = | Exp_a_D; // hidden bit
assign Hb_b_D = | Exp_b_D; // hidden bit
assign Start_S= Div_start_SI | Sqrt_start_SI;
/////////////////////////////////////////////////////////////////////////////
// preliminary checks for infinite/zero/NaN operands //
/////////////////////////////////////////////////////////////////////////////
logic Mant_a_prenorm_zero_S;
logic Mant_b_prenorm_zero_S;
logic Exp_a_prenorm_zero_S;
logic Exp_b_prenorm_zero_S;
assign Exp_a_prenorm_zero_S = ~Hb_a_D;
assign Exp_b_prenorm_zero_S = ~Hb_b_D;
logic Exp_a_prenorm_Inf_NaN_S;
logic Exp_b_prenorm_Inf_NaN_S;
logic Mant_a_prenorm_QNaN_S;
logic Mant_a_prenorm_SNaN_S;
logic Mant_b_prenorm_QNaN_S;
logic Mant_b_prenorm_SNaN_S;
assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0]));
assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0]));
assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0]));
assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0]));
always_comb
begin
case(Format_sel_SI)
2'b00:
begin
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
end
2'b01:
begin
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
end
2'b10:
begin
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
end
2'b11:
begin
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
end
endcase
end
logic Zero_a_SN,Zero_a_SP;
logic Zero_b_SN,Zero_b_SP;
logic Inf_a_SN,Inf_a_SP;
logic Inf_b_SN,Inf_b_SP;
logic NaN_a_SN,NaN_a_SP;
logic NaN_b_SN,NaN_b_SP;
logic SNaN_SN,SNaN_SP;
assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP;
assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP;
assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP;
assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP;
assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP;
assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP;
assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP;
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Zero_a_SP <='0;
Zero_b_SP <='0;
Inf_a_SP <='0;
Inf_b_SP <='0;
NaN_a_SP <='0;
NaN_b_SP <='0;
SNaN_SP <= '0;
end
else
begin
Inf_a_SP <=Inf_a_SN;
Inf_b_SP <=Inf_b_SN;
Zero_a_SP <=Zero_a_SN;
Zero_b_SP <=Zero_b_SN;
NaN_a_SP <=NaN_a_SN;
NaN_b_SP <=NaN_b_SN;
SNaN_SP <= SNaN_SN;
end
end
/////////////////////////////////////////////////////////////////////////////
// Low power control
/////////////////////////////////////////////////////////////////////////////
assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN | Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI);
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Special_case_dly_SBO <= '0;
end
else if((Start_S&&Ready_SI))
begin
Special_case_dly_SBO <= Special_case_SBO;
end
else if(Special_case_dly_SBO)
begin
Special_case_dly_SBO <= 1'b1;
end
else
begin
Special_case_dly_SBO <= '0;
end
end
/////////////////////////////////////////////////////////////////////////////
// Delay sign for normalization and round //
/////////////////////////////////////////////////////////////////////////////
logic Sign_z_DN;
logic Sign_z_DP;
always_comb
begin
if(Div_start_SI&&Ready_SI)
Sign_z_DN = Sign_a_D ^ Sign_b_D;
else if(Sqrt_start_SI&&Ready_SI)
Sign_z_DN = Sign_a_D;
else
Sign_z_DN = Sign_z_DP;
end
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Sign_z_DP <= '0;
end
else
begin
Sign_z_DP <= Sign_z_DN;
end
end
logic [C_RM-1:0] RM_DN;
logic [C_RM-1:0] RM_DP;
always_comb
begin
if(Start_S&&Ready_SI)
RM_DN = RM_SI;
else
RM_DN = RM_DP;
end
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
RM_DP <= '0;
end
else
begin
RM_DP <= RM_DN;
end
end
assign RM_dly_SO = RM_DP;
logic [5:0] Mant_leadingOne_a, Mant_leadingOne_b;
logic Mant_zero_S_a,Mant_zero_S_b;
lzc #(
.WIDTH ( C_MANT_FP64+1 ),
.MODE ( 1 )
) LOD_Ua (
.in_i ( Mant_a_D ),
.cnt_o ( Mant_leadingOne_a ),
.empty_o ( Mant_zero_S_a )
);
logic [C_MANT_FP64:0] Mant_a_norm_DN,Mant_a_norm_DP;
assign Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP;
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Mant_a_norm_DP <= '0;
end
else
begin
Mant_a_norm_DP<=Mant_a_norm_DN;
end
end
logic [C_EXP_FP64:0] Exp_a_norm_DN,Exp_a_norm_DP;
assign Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP; //Covering the process of denormal numbers
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Exp_a_norm_DP <= '0;
end
else
begin
Exp_a_norm_DP<=Exp_a_norm_DN;
end
end
lzc #(
.WIDTH ( C_MANT_FP64+1 ),
.MODE ( 1 )
) LOD_Ub (
.in_i ( Mant_b_D ),
.cnt_o ( Mant_leadingOne_b ),
.empty_o ( Mant_zero_S_b )
);
logic [C_MANT_FP64:0] Mant_b_norm_DN,Mant_b_norm_DP;
assign Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP;
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Mant_b_norm_DP <= '0;
end
else
begin
Mant_b_norm_DP<=Mant_b_norm_DN;
end
end
logic [C_EXP_FP64:0] Exp_b_norm_DN,Exp_b_norm_DP;
assign Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers
always_ff @(posedge Clk_CI, negedge Rst_RBI)
begin
if(~Rst_RBI)
begin
Exp_b_norm_DP <= '0;
end
else
begin
Exp_b_norm_DP<=Exp_b_norm_DN;
end
end
/////////////////////////////////////////////////////////////////////////////
// Output assignments //
/////////////////////////////////////////////////////////////////////////////
assign Start_SO=Start_S;
assign Exp_a_DO_norm=Exp_a_norm_DP;
assign Exp_b_DO_norm=Exp_b_norm_DP;
assign Mant_a_DO_norm=Mant_a_norm_DP;
assign Mant_b_DO_norm=Mant_b_norm_DP;
assign Sign_z_DO=Sign_z_DP;
assign Inf_a_SO=Inf_a_SP;
assign Inf_b_SO=Inf_b_SP;
assign Zero_a_SO=Zero_a_SP;
assign Zero_b_SO=Zero_b_SP;
assign NaN_a_SO=NaN_a_SP;
assign NaN_b_SO=NaN_b_SP;
assign SNaN_SO=SNaN_SP;
endmodule

View file

@ -1,14 +0,0 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
// This file is generated by the util/vendor script. Please do not modify it
// manually.
{
upstream:
{
url: https://github.com/openhwgroup/cvfpu
rev: 3116391bf66660f806b45e212b9949c528b4e270
}
}

View file

@ -1,34 +0,0 @@
// -*- coding: utf-8 -*-
// Copyright (C) 2022 Thales DIS France SAS
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
{
// Name of the project
name: "openhwgroup_cvfpu",
// Target directory: relative to the location of this script.
target_dir: "openhwgroup/cvfpu",
// Upstream repository
upstream: {
// URL
url: "https://github.com/openhwgroup/cvfpu",
// revision
rev: "v0.7.0",
}
// Patch dir for local changes
patch_dir: "patches/openhwgroup/cvfpu",
// Exclusions from upstream content
exclude_from_upstream: [
".gitmodules",
"Bender.yml",
"docs",
"ips_list.yml",
"src_files.yml",
"tb",
]
}

View file

@ -1,14 +0,0 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
// This file is generated by the util/vendor script. Please do not modify it
// manually.
{
upstream:
{
url: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git
rev: 86e1f558b3c95e91577c41b2fc452c86b04e85ac
}
}

View file

@ -1,32 +0,0 @@
// -*- coding: utf-8 -*-
// Copyright (C) 2022 Thales DIS France SAS
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
{
// Name of the project
name: "pulp_fpu_div_sqrt_mvp",
// Target directory: relative to the location of this script.
target_dir: "openhwgroup/cvfpu/src/fpu_div_sqrt_mvp",
// Upstream repository
upstream: {
// URL
url: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git",
// revision
rev: "v1.0.4",
}
//Patch dir for local changes
patch_dir: "patches/pulp-platform/fpu_div_sqrt_mvp",
// Exclusions from upstream content
exclude_from_upstream: [
".git",
"Bender.yml",
"document",
"src_files.yml",
]
}