mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-19 03:44:46 -04:00
Cvfpu from vendor to submodule (#2070)
This commit is contained in:
parent
3515908315
commit
3ecabdb95a
34 changed files with 23 additions and 10589 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -37,6 +37,9 @@
|
|||
[submodule "verif/core-v-verif"]
|
||||
path = verif/core-v-verif
|
||||
url = https://github.com/openhwgroup/core-v-verif
|
||||
[submodule "core/cvfpu"]
|
||||
path = core/cvfpu
|
||||
url = https://github.com/openhwgroup/cvfpu.git
|
||||
[submodule "core/cache_subsystem/hpdcache"]
|
||||
path = core/cache_subsystem/hpdcache
|
||||
url = https://github.com/openhwgroup/cv-hpdcache.git
|
||||
|
|
|
@ -37,25 +37,25 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv
|
|||
+incdir+${CVA6_REPO_DIR}/common/local/util/
|
||||
|
||||
// Floating point unit
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_top.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_pkg.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_cast_multi.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_classifier.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_divsqrt_multi.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_fma_multi.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_fma.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_noncomp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_opgroup_block.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_opgroup_fmt_slice.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_opgroup_multifmt_slice.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_rounding.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpnew_top.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
|
||||
${CVA6_REPO_DIR}/core/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
|
||||
|
||||
${CVA6_REPO_DIR}/core/include/config_pkg.sv
|
||||
${CVA6_REPO_DIR}/core/include/${TARGET_CFG}_config_pkg.sv
|
||||
|
|
1
core/cvfpu
Submodule
1
core/cvfpu
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 3116391bf66660f806b45e212b9949c528b4e270
|
3
vendor/openhwgroup/cvfpu/.gitignore
vendored
3
vendor/openhwgroup/cvfpu/.gitignore
vendored
|
@ -1,3 +0,0 @@
|
|||
*~
|
||||
html
|
||||
Bender.lock
|
33
vendor/openhwgroup/cvfpu/CITATION.cff
vendored
33
vendor/openhwgroup/cvfpu/CITATION.cff
vendored
|
@ -1,33 +0,0 @@
|
|||
cff-version: 1.2.0
|
||||
message: "If you use FPnew, please cite it as below."
|
||||
authors:
|
||||
- family-names: "Mach"
|
||||
given-names: "Stefan"
|
||||
orcid: "https://orcid.org/0000-0002-3476-8857"
|
||||
title: "FPnew: - New Floating-Point Unit with Transprecision Capabilities"
|
||||
version: 0.6.6
|
||||
url: "https://github.com/pulp-platform/fpnew"
|
||||
preferred-citation:
|
||||
type: article
|
||||
authors:
|
||||
- family-names: "Mach"
|
||||
given-names: "Stefan"
|
||||
orcid: "https://orcid.org/0000-0002-3476-8857"
|
||||
- family-names: "Schuiki"
|
||||
given-names: "Fabian"
|
||||
orcid: "https://orcid.org/0000-0002-9923-5031"
|
||||
- family-names: "Zaruba"
|
||||
given-names: "Florian"
|
||||
orcid: "https://orcid.org/0000-0002-8194-6521"
|
||||
- family-names: "Benini"
|
||||
given-names: "Luca"
|
||||
orcid: "https://orcid.org/0000-0001-8068-3806"
|
||||
doi: "10.1109/TVLSI.2020.3044752"
|
||||
journal: "IEEE Transactions on Very Large Scale Integration (VLSI) Systems"
|
||||
month: 12
|
||||
start: 774
|
||||
end: 787
|
||||
title: "FPnew: An Open-Source Multiformat Floating-Point Unit Architecture for Energy-Proportional Transprecision Computing"
|
||||
issue: 4
|
||||
volume: 29
|
||||
year: 2020
|
176
vendor/openhwgroup/cvfpu/LICENSE
vendored
176
vendor/openhwgroup/cvfpu/LICENSE
vendored
|
@ -1,176 +0,0 @@
|
|||
SOLDERPAD HARDWARE LICENSE version 0.51
|
||||
|
||||
This license is based closely on the Apache License Version 2.0, but is not
|
||||
approved or endorsed by the Apache Foundation. A copy of the non-modified
|
||||
Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
|
||||
As this license is not currently OSI or FSF approved, the Licensor permits any
|
||||
Work licensed under this License, at the option of the Licensee, to be treated
|
||||
as licensed under the Apache License Version 2.0 (which is so approved).
|
||||
|
||||
This License is licensed under the terms of this License and in particular
|
||||
clause 7 below (Disclaimer of Warranties) applies in relation to its use.
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
“License” shall mean the terms and conditions for use, reproduction, and
|
||||
distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
“Licensor” shall mean the Rights owner or entity authorized by the Rights owner
|
||||
that is granting the License.
|
||||
|
||||
“Legal Entity” shall mean the union of the acting entity and all other entities
|
||||
that control, are controlled by, or are under common control with that entity.
|
||||
For the purposes of this definition, “control” means (i) the power, direct or
|
||||
indirect, to cause the direction or management of such entity, whether by
|
||||
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
“You” (or “Your”) shall mean an individual or Legal Entity exercising
|
||||
permissions granted by this License.
|
||||
|
||||
“Rights” means copyright and any similar right including design right (whether
|
||||
registered or unregistered), semiconductor topography (mask) rights and
|
||||
database rights (but excluding Patents and Trademarks).
|
||||
|
||||
“Source” form shall mean the preferred form for making modifications, including
|
||||
but not limited to source code, net lists, board layouts, CAD files,
|
||||
documentation source, and configuration files.
|
||||
|
||||
“Object” form shall mean any form resulting from mechanical transformation or
|
||||
translation of a Source form, including but not limited to compiled object
|
||||
code, generated documentation, the instantiation of a hardware design and
|
||||
conversions to other media types, including intermediate forms such as
|
||||
bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
|
||||
works).
|
||||
|
||||
“Work” shall mean the work of authorship, whether in Source form or other
|
||||
Object form, made available under the License, as indicated by a Rights notice
|
||||
that is included in or attached to the work (an example is provided in the
|
||||
Appendix below).
|
||||
|
||||
“Derivative Works” shall mean any work, whether in Source or Object form, that
|
||||
is based on (or derived from) the Work and for which the editorial revisions,
|
||||
annotations, elaborations, or other modifications represent, as a whole, an
|
||||
original work of authorship. For the purposes of this License, Derivative Works
|
||||
shall not include works that remain separable from, or merely link (or bind by
|
||||
name) or physically connect to or interoperate with the interfaces of, the Work
|
||||
and Derivative Works thereof.
|
||||
|
||||
“Contribution” shall mean any design or work of authorship, including the
|
||||
original version of the Work and any modifications or additions to that Work or
|
||||
Derivative Works thereof, that is intentionally submitted to Licensor for
|
||||
inclusion in the Work by the Rights owner or by an individual or Legal Entity
|
||||
authorized to submit on behalf of the Rights owner. For the purposes of this
|
||||
definition, “submitted” means any form of electronic, verbal, or written
|
||||
communication sent to the Licensor or its representatives, including but not
|
||||
limited to communication on electronic mailing lists, source code control
|
||||
systems, and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but excluding
|
||||
communication that is conspicuously marked or otherwise designated in writing
|
||||
by the Rights owner as “Not a Contribution.”
|
||||
|
||||
“Contributor” shall mean Licensor and any individual or Legal Entity on behalf
|
||||
of whom a Contribution has been received by Licensor and subsequently
|
||||
incorporated within the Work.
|
||||
|
||||
2. Grant of License. Subject to the terms and conditions of this License, each
|
||||
Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
|
||||
no-charge, royalty-free, irrevocable license under the Rights to reproduce,
|
||||
prepare Derivative Works of, publicly display, publicly perform, sublicense,
|
||||
and distribute the Work and such Derivative Works in Source or Object form and
|
||||
do anything in relation to the Work as if the Rights did not exist.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of this
|
||||
License, each Contributor hereby grants to You a perpetual, worldwide,
|
||||
non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
|
||||
section) patent license to make, have made, use, offer to sell, sell, import,
|
||||
and otherwise transfer the Work, where such license applies only to those
|
||||
patent claims licensable by such Contributor that are necessarily infringed by
|
||||
their Contribution(s) alone or by combination of their Contribution(s) with the
|
||||
Work to which such Contribution(s) was submitted. If You institute patent
|
||||
litigation against any entity (including a cross-claim or counterclaim in a
|
||||
lawsuit) alleging that the Work or a Contribution incorporated within the Work
|
||||
constitutes direct or contributory patent infringement, then any patent
|
||||
licenses granted to You under this License for that Work shall terminate as of
|
||||
the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the Work or
|
||||
Derivative Works thereof in any medium, with or without modifications, and in
|
||||
Source or Object form, provided that You meet the following conditions:
|
||||
|
||||
You must give any other recipients of the Work or Derivative Works a copy
|
||||
of this License; and
|
||||
|
||||
You must cause any modified files to carry prominent notices stating that
|
||||
You changed the files; and
|
||||
|
||||
You must retain, in the Source form of any Derivative Works that You
|
||||
distribute, all copyright, patent, trademark, and attribution notices from
|
||||
the Source form of the Work, excluding those notices that do not pertain to
|
||||
any part of the Derivative Works; and
|
||||
|
||||
If the Work includes a “NOTICE” text file as part of its distribution, then
|
||||
any Derivative Works that You distribute must include a readable copy of
|
||||
the attribution notices contained within such NOTICE file, excluding those
|
||||
notices that do not pertain to any part of the Derivative Works, in at
|
||||
least one of the following places: within a NOTICE text file distributed as
|
||||
part of the Derivative Works; within the Source form or documentation, if
|
||||
provided along with the Derivative Works; or, within a display generated by
|
||||
the Derivative Works, if and wherever such third-party notices normally
|
||||
appear. The contents of the NOTICE file are for informational purposes only
|
||||
and do not modify the License. You may add Your own attribution notices
|
||||
within Derivative Works that You distribute, alongside or as an addendum to
|
||||
the NOTICE text from the Work, provided that such additional attribution
|
||||
notices cannot be construed as modifying the License. You may add Your own
|
||||
copyright statement to Your modifications and may provide additional or
|
||||
different license terms and conditions for use, reproduction, or
|
||||
distribution of Your modifications, or for any such Derivative Works as a
|
||||
whole, provided Your use, reproduction, and distribution of the Work
|
||||
otherwise complies with the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, any
|
||||
Contribution intentionally submitted for inclusion in the Work by You to the
|
||||
Licensor shall be under the terms and conditions of this License, without any
|
||||
additional terms or conditions. Notwithstanding the above, nothing herein shall
|
||||
supersede or modify the terms of any separate license agreement you may have
|
||||
executed with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade names,
|
||||
trademarks, service marks, or product names of the Licensor, except as required
|
||||
for reasonable and customary use in describing the origin of the Work and
|
||||
reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
|
||||
writing, Licensor provides the Work (and each Contributor provides its
|
||||
Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied, including, without limitation, any warranties
|
||||
or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any risks
|
||||
associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, whether in
|
||||
tort (including negligence), contract, or otherwise, unless required by
|
||||
applicable law (such as deliberate and grossly negligent acts) or agreed to in
|
||||
writing, shall any Contributor be liable to You for damages, including any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character arising as a result of this License or out of the use or inability to
|
||||
use the Work (including but not limited to damages for loss of goodwill, work
|
||||
stoppage, computer failure or malfunction, or any and all other commercial
|
||||
damages or losses), even if such Contributor has been advised of the
|
||||
possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing the Work or
|
||||
Derivative Works thereof, You may choose to offer, and charge a fee for,
|
||||
acceptance of support, warranty, indemnity, or other liability obligations
|
||||
and/or rights consistent with this License. However, in accepting such
|
||||
obligations, You may act only on Your own behalf and on Your sole
|
||||
responsibility, not on behalf of any other Contributor, and only if You agree
|
||||
to indemnify, defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason of your
|
||||
accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
174
vendor/openhwgroup/cvfpu/README.md
vendored
174
vendor/openhwgroup/cvfpu/README.md
vendored
|
@ -1,174 +0,0 @@
|
|||
# FPnew - New Floating-Point Unit with Transprecision Capabilities
|
||||
|
||||
Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog.
|
||||
|
||||
Maintainer: Luca Bertaccini <lbertaccini@iis.ee.ethz.ch>
|
||||
Principal Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
## Features
|
||||
|
||||
The FPU is a parametric design that allows generating FP hardware units for various use cases.
|
||||
Even though mainly designed for use in RISC-V processors, the FPU or its sub-blocks can easily be utilized in other environments.
|
||||
Our design aims to be compliant with IEEE 754-2008 and provides the following features:
|
||||
|
||||
### Formats
|
||||
Any IEEE 754-2008 style binary floating-point format can be supported, including single-, double-, quad- and half-precision (`binary32`, `binary64`, `binary128`, `binary16`).
|
||||
Formats can be defined with arbitrary number of exponent and mantissa bits through parameters and are always symmetrically biased.
|
||||
Multiple FP formats can be supported concurrently, and the number of formats supported is not limited.
|
||||
|
||||
Multiple integer formats with arbitrary number of bits (as source or destionation of conversions) can also be defined.
|
||||
|
||||
### Operations
|
||||
- Addition/Subtraction
|
||||
- Multiplication
|
||||
- Fused multiply-add in four flavours (`fmadd`, `fmsub`, `fnmadd`, `fnmsub`)
|
||||
- Division<sup>1</sup>
|
||||
- Square root<sup>1</sup>
|
||||
- Minimum/Maximum<sup>2</sup>
|
||||
- Comparisons
|
||||
- Sign-Injections (`copy`, `abs`, `negate`, `copySign` etc.)
|
||||
- Conversions among all supported FP formats
|
||||
- Conversions between FP formats and integers (signed & unsigned) and vice versa
|
||||
- Classification
|
||||
|
||||
Multi-format FMA operations (i.e. multiplication in one format, accumulation in another) are optionally supported.
|
||||
|
||||
Optionally, *packed-SIMD* versions of all the above operations can be generated for formats narrower than the FPU datapath width.
|
||||
E.g.: Support for double-precision (64bit) operations and two simultaneous single-precision (32bit) operations.
|
||||
|
||||
It is also possible to generate only a subset of operations if e.g. divisions are not needed.
|
||||
|
||||
<sup>1</sup>Some compliance issues with IEEE 754-2008 are currently known to exist<br>
|
||||
<sup>2</sup>Implementing IEEE 754-201x `minimumNumber` and `maximumNumber`, respectively
|
||||
|
||||
### Rounding modes
|
||||
All IEEE 754-2008 rounding modes are supported, namely
|
||||
- `roundTiesToEven`
|
||||
- `roundTiesToAway`
|
||||
- `roundTowardPositive`
|
||||
- `roundTowardNegative`
|
||||
- `roundTowardZero`
|
||||
|
||||
### Status Flags
|
||||
All IEEE 754-2008 status flags are supported, namely
|
||||
- Invalid operation (`NV`)
|
||||
- Division by zero (`DZ`)
|
||||
- Overflow (`OF`)
|
||||
- Underflow (`UF`)
|
||||
- Inexact (`NX`)
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Dependencies
|
||||
|
||||
FPnew currently depends on the following:
|
||||
- `lzc` and `rr_arb_tree` from the `common_cells` repository (https://github.com/pulp-platform/common_cells.git)
|
||||
- optional: Divider and square-root unit from the `fpu-div-sqrt-mvp` repository (https://github.com/pulp-platform/fpu_div_sqrt_mvp.git)
|
||||
|
||||
These two repositories are included in the source code directory as git submodules, use
|
||||
```bash
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
if you want to load these dependencies there.
|
||||
|
||||
Consider using [Bender](https://github.com/fabianschuiki/bender.git) for managing dependencies in your projects. FPnew comes with Bender support!
|
||||
|
||||
### Usage
|
||||
|
||||
The top-level module of the FPU is called `fpnew_top` and can be directly instantiated in your design.
|
||||
Make sure you compile the package `fpnew_pkg` ahead of any files making references to types, parameters or functions defined there.
|
||||
|
||||
It is discouraged to `import` all of `fpnew_pkg` into your source files. Instead, explicitly scope references into the package like so: `fpnew_pkg::foo`.
|
||||
|
||||
#### Example Instantiation
|
||||
|
||||
```SystemVerilog
|
||||
// FPU instance
|
||||
fpnew_top #(
|
||||
.Features ( fpnew_pkg::RV64D ),
|
||||
.Implementation ( fpnew_pkg::DEFAULT_NOREGS ),
|
||||
.TagType ( logic )
|
||||
) i_fpnew_top (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i,
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.src_fmt_i,
|
||||
.dst_fmt_i,
|
||||
.int_fmt_i,
|
||||
.vectorial_op_i,
|
||||
.tag_i,
|
||||
.in_valid_i,
|
||||
.in_ready_o,
|
||||
.flush_i,
|
||||
.result_o,
|
||||
.status_o,
|
||||
.tag_o,
|
||||
.out_valid_o,
|
||||
.out_ready_i,
|
||||
.busy_o
|
||||
);
|
||||
```
|
||||
|
||||
### Documentation
|
||||
|
||||
More in-depth documentation on the FPnew configuration, interfaces and architecture is provided in [`docs/README.md`](docs/README.md).
|
||||
|
||||
### Issues and Contributing
|
||||
|
||||
In case you find any issues with FPnew that have not been reported yet, don't hesitate to open a new [issue](https://github.com/pulp-platform/fpnew/issues) here on Github.
|
||||
Please, don't use the issue tracker for support questions.
|
||||
Instead, consider contacting the maintainers or consulting the [PULP forums](https://pulp-platform.org/community/index.php).
|
||||
|
||||
In case you would like to contribute to the project, please refer to the contributing guidelines in [`docs/CONTRIBUTING.md`](docs/CONTRIBUTING.md) before opening a pull request.
|
||||
|
||||
|
||||
### Repository Structure
|
||||
|
||||
HDL source code can be found in the `src` directory while documentation is located in `docs`.
|
||||
A changelog is kept at [`docs/CHANGELOG.md`](docs/CHANGELOG.md).
|
||||
|
||||
This repository loosely follows the [GitFlow](https://nvie.com/posts/a-successful-git-branching-model/) branching model.
|
||||
This means that the `master` branch is considered stable and used to publish releases of the FPU while the `develop` branch contains features and bugfixes that have not yet been properly released.
|
||||
|
||||
Furthermore, this repository tries to adhere to [SemVer](https://semver.org/), as outlined in the [changelog](docs/CHANGELOG.md).
|
||||
|
||||
## Licensing
|
||||
|
||||
FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [license file](LICENSE) for further information.
|
||||
|
||||
|
||||
## Publication
|
||||
|
||||
If you use FPnew in your work, you can cite us:
|
||||
|
||||
<details>
|
||||
<summary>FPnew Publication</summary>
|
||||
<p>
|
||||
|
||||
```
|
||||
@article{mach2020fpnew,
|
||||
title={Fpnew: An open-source multiformat floating-point unit architecture for energy-proportional transprecision computing},
|
||||
author={Mach, Stefan and Schuiki, Fabian and Zaruba, Florian and Benini, Luca},
|
||||
journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
|
||||
volume={29},
|
||||
number={4},
|
||||
pages={774--787},
|
||||
year={2020},
|
||||
publisher={IEEE}
|
||||
}
|
||||
```
|
||||
|
||||
</p>
|
||||
</details>
|
||||
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 732631.
|
||||
|
||||
For further information, visit [oprecomp.eu](http://oprecomp.eu).
|
||||
|
||||

|
794
vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
vendored
794
vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv
vendored
|
@ -1,794 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
module fpnew_cast_multi #(
|
||||
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
|
||||
parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
|
||||
// FPU configuration
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
parameter type AuxType = logic,
|
||||
// Do not change
|
||||
localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig),
|
||||
fpnew_pkg::max_int_width(IntFmtConfig)),
|
||||
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [WIDTH-1:0] operands_i, // 1 operand
|
||||
input logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input fpnew_pkg::fp_format_e src_fmt_i,
|
||||
input fpnew_pkg::fp_format_e dst_fmt_i,
|
||||
input fpnew_pkg::int_format_e int_fmt_i,
|
||||
input TagType tag_i,
|
||||
input logic mask_i,
|
||||
input AuxType aux_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [WIDTH-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
output logic mask_o,
|
||||
output AuxType aux_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
// ----------
|
||||
// Constants
|
||||
// ----------
|
||||
localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
|
||||
localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
|
||||
|
||||
localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
|
||||
|
||||
localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
|
||||
localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
|
||||
localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1;
|
||||
|
||||
// The internal mantissa includes normal bit or an entire integer
|
||||
localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH);
|
||||
// If needed, there will be a LZC for renormalization
|
||||
localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH);
|
||||
// The internal exponent must be able to represent the smallest denormal input value as signed
|
||||
// or the number of bits in an integer
|
||||
localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH),
|
||||
fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1;
|
||||
// Pipelines
|
||||
localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 2) / 3) // First to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? (NumPipeRegs / 3) // Last to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
|
||||
// ---------------
|
||||
// Input pipeline
|
||||
// ---------------
|
||||
// Selected pipeline output signals as non-arrays
|
||||
logic [WIDTH-1:0] operands_q;
|
||||
logic [NUM_FORMATS-1:0] is_boxed_q;
|
||||
logic op_mod_q;
|
||||
fpnew_pkg::fp_format_e src_fmt_q;
|
||||
fpnew_pkg::fp_format_e dst_fmt_q;
|
||||
fpnew_pkg::int_format_e int_fmt_q;
|
||||
|
||||
// Input pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q;
|
||||
logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
|
||||
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
|
||||
fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q;
|
||||
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
|
||||
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_INP_REGS] inp_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign inp_pipe_operands_q[0] = operands_i;
|
||||
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
|
||||
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
|
||||
assign inp_pipe_op_q[0] = op_i;
|
||||
assign inp_pipe_op_mod_q[0] = op_mod_i;
|
||||
assign inp_pipe_src_fmt_q[0] = src_fmt_i;
|
||||
assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
|
||||
assign inp_pipe_int_fmt_q[0] = int_fmt_i;
|
||||
assign inp_pipe_tag_q[0] = tag_i;
|
||||
assign inp_pipe_mask_q[0] = mask_i;
|
||||
assign inp_pipe_aux_q[0] = aux_i;
|
||||
assign inp_pipe_valid_q[0] = in_valid_i;
|
||||
// Input stage: Propagate pipeline ready signal to updtream circuitry
|
||||
assign in_ready_o = inp_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
|
||||
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0))
|
||||
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: assign selected pipe outputs to signals for later use
|
||||
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
|
||||
assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS];
|
||||
assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS];
|
||||
assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS];
|
||||
assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
|
||||
assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS];
|
||||
|
||||
// -----------------
|
||||
// Input processing
|
||||
// -----------------
|
||||
logic src_is_int, dst_is_int; // if 0, it's a float
|
||||
|
||||
assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F);
|
||||
assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I);
|
||||
|
||||
logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
||||
|
||||
logic [NUM_FORMATS-1:0] fmt_sign;
|
||||
logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
||||
logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||
logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC
|
||||
|
||||
fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info;
|
||||
|
||||
logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val;
|
||||
logic int_sign;
|
||||
logic [INT_MAN_WIDTH-1:0] int_value, int_mantissa;
|
||||
|
||||
// FP Input initialization
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
// Classify input
|
||||
fpnew_classifier #(
|
||||
.FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
|
||||
.NumOperands ( 1 )
|
||||
) i_fpnew_classifier (
|
||||
.operands_i ( operands_q[FP_WIDTH-1:0] ),
|
||||
.is_boxed_i ( is_boxed_q[fmt] ),
|
||||
.info_o ( info[fmt] )
|
||||
);
|
||||
|
||||
assign fmt_sign[fmt] = operands_q[FP_WIDTH-1];
|
||||
assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]});
|
||||
assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad
|
||||
// Compensation for the difference in mantissa widths used for leading-zero count
|
||||
assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS);
|
||||
end else begin : inactive_format
|
||||
assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled
|
||||
assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
end
|
||||
end
|
||||
|
||||
// Sign-extend INT input
|
||||
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int
|
||||
// Set up some constants
|
||||
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
|
||||
|
||||
if (IntFmtConfig[ifmt]) begin : active_format // only active formats
|
||||
always_comb begin : sign_ext_input
|
||||
// sign-extend value only if it's signed
|
||||
ifmt_input_val[ifmt] = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q};
|
||||
ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0];
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
end
|
||||
end
|
||||
|
||||
// Construct input mantissa from integer
|
||||
assign int_value = ifmt_input_val[int_fmt_q];
|
||||
assign int_sign = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative
|
||||
assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative
|
||||
|
||||
// select mantissa with source format
|
||||
assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q];
|
||||
|
||||
// --------------
|
||||
// Normalization
|
||||
// --------------
|
||||
logic signed [INT_EXP_WIDTH-1:0] src_bias; // src format bias
|
||||
logic signed [INT_EXP_WIDTH-1:0] src_exp; // src format exponent (biased)
|
||||
logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal
|
||||
logic signed [INT_EXP_WIDTH-1:0] src_offset; // src offset within mantissa
|
||||
|
||||
assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q));
|
||||
assign src_exp = fmt_exponent[src_fmt_q];
|
||||
assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal});
|
||||
assign src_offset = fmt_shift_compensation[src_fmt_q];
|
||||
|
||||
logic input_sign; // input sign
|
||||
logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||
logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||
logic mant_is_zero; // for integer zeroes
|
||||
|
||||
logic signed [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
logic signed [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Input mantissa needs to be normalized
|
||||
logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount
|
||||
logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations
|
||||
|
||||
// Leading-zero counter is needed for renormalization
|
||||
lzc #(
|
||||
.WIDTH ( INT_MAN_WIDTH ),
|
||||
.MODE ( 1 ) // MODE = 1 counts leading zeroes
|
||||
) i_lzc (
|
||||
.in_i ( encoded_mant ),
|
||||
.cnt_o ( renorm_shamt ),
|
||||
.empty_o ( mant_is_zero )
|
||||
);
|
||||
assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt});
|
||||
|
||||
// Get the sign from the proper source
|
||||
assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q];
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant = encoded_mant << renorm_shamt;
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = signed'(src_exp + src_subnormal - src_bias -
|
||||
renorm_shamt_sgn + src_offset); // compensate for shift
|
||||
assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
|
||||
|
||||
assign input_exp = src_is_int ? int_input_exp : fp_input_exp;
|
||||
|
||||
logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q));
|
||||
|
||||
// ---------------
|
||||
// Internal pipeline
|
||||
// ---------------
|
||||
// Pipeline output signals as non-arrays
|
||||
logic input_sign_q;
|
||||
logic signed [INT_EXP_WIDTH-1:0] input_exp_q;
|
||||
logic [INT_MAN_WIDTH-1:0] input_mant_q;
|
||||
logic signed [INT_EXP_WIDTH-1:0] destination_exp_q;
|
||||
logic src_is_int_q;
|
||||
logic dst_is_int_q;
|
||||
fpnew_pkg::fp_info_t info_q;
|
||||
logic mant_is_zero_q;
|
||||
logic op_mod_q2;
|
||||
fpnew_pkg::roundmode_e rnd_mode_q;
|
||||
fpnew_pkg::fp_format_e src_fmt_q2;
|
||||
fpnew_pkg::fp_format_e dst_fmt_q2;
|
||||
fpnew_pkg::int_format_e int_fmt_q2;
|
||||
// Internal pipeline signals, index i holds signal after i register stages
|
||||
|
||||
|
||||
logic [0:NUM_MID_REGS] mid_pipe_input_sign_q;
|
||||
logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q;
|
||||
logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q;
|
||||
logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q;
|
||||
fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_op_mod_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
|
||||
fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q;
|
||||
TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_mask_q;
|
||||
AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_MID_REGS] mid_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from upstream logic
|
||||
assign mid_pipe_input_sign_q[0] = input_sign;
|
||||
assign mid_pipe_input_exp_q[0] = input_exp;
|
||||
assign mid_pipe_input_mant_q[0] = input_mant;
|
||||
assign mid_pipe_dest_exp_q[0] = destination_exp;
|
||||
assign mid_pipe_src_is_int_q[0] = src_is_int;
|
||||
assign mid_pipe_dst_is_int_q[0] = dst_is_int;
|
||||
assign mid_pipe_info_q[0] = info[src_fmt_q];
|
||||
assign mid_pipe_mant_zero_q[0] = mant_is_zero;
|
||||
assign mid_pipe_op_mod_q[0] = op_mod_q;
|
||||
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
|
||||
assign mid_pipe_src_fmt_q[0] = src_fmt_q;
|
||||
assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
|
||||
assign mid_pipe_int_fmt_q[0] = int_fmt_q;
|
||||
assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
|
||||
assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
|
||||
assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
|
||||
assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to input pipe
|
||||
assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
|
||||
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0))
|
||||
`FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: assign selected pipe outputs to signals for later use
|
||||
assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS];
|
||||
assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS];
|
||||
assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS];
|
||||
assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS];
|
||||
assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS];
|
||||
assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS];
|
||||
assign info_q = mid_pipe_info_q[NUM_MID_REGS];
|
||||
assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS];
|
||||
assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS];
|
||||
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
|
||||
assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS];
|
||||
assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
|
||||
assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS];
|
||||
|
||||
// --------
|
||||
// Casting
|
||||
// --------
|
||||
logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
|
||||
logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
|
||||
logic [SUPER_MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||
logic [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||
|
||||
logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization
|
||||
|
||||
logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
|
||||
logic of_before_round, uf_before_round;
|
||||
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
always_comb begin : cast_value
|
||||
// Default assignment
|
||||
final_exp = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits
|
||||
preshift_mant = '0; // initialize mantissa container with zeroes
|
||||
denorm_shamt = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
uf_before_round = 1'b0;
|
||||
|
||||
// Place mantissa to the left of the shifter
|
||||
preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1);
|
||||
|
||||
// Handle INT casts
|
||||
if (dst_is_int_q) begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q);
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin
|
||||
denorm_shamt = '0; // prevent shifting
|
||||
of_before_round = 1'b1;
|
||||
// underflow
|
||||
end else if (input_exp_q < -1) begin
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
uf_before_round = 1'b1;
|
||||
end
|
||||
// Handle FP over-/underflows
|
||||
end else begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) ||
|
||||
(~src_is_int_q && info_q.is_inf)) begin
|
||||
final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value
|
||||
preshift_mant = '1; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
// Denormalize underflowing values
|
||||
end else if (destination_exp_q < 1 &&
|
||||
destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
|
||||
final_exp = '0; // denormal result
|
||||
denorm_shamt = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting
|
||||
uf_before_round = 1'b1;
|
||||
// Limit the shift to retain sticky bits
|
||||
end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin
|
||||
final_exp = '0; // denormal result
|
||||
denorm_shamt = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky
|
||||
uf_before_round = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R
|
||||
localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R
|
||||
|
||||
// Mantissa adjustment shift
|
||||
assign destination_mant = preshift_mant >> denorm_shamt;
|
||||
// Extract final mantissa and round bit, discard the normal bit (for FP)
|
||||
assign {final_mant, fp_round_sticky_bits[1]} =
|
||||
destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1];
|
||||
assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1];
|
||||
// Collapse sticky bits
|
||||
assign fp_round_sticky_bits[0] = (| {destination_mant[NUM_FP_STICKY-1:0]});
|
||||
assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]});
|
||||
|
||||
// select RS bits for destination operation
|
||||
assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits;
|
||||
|
||||
// ----------------------------
|
||||
// Rounding and classification
|
||||
// ----------------------------
|
||||
logic [WIDTH-1:0] pre_round_abs; // absolute value of result before rnd
|
||||
logic of_after_round; // overflow
|
||||
logic uf_after_round; // underflow
|
||||
|
||||
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format
|
||||
logic [NUM_FORMATS-1:0] fmt_of_after_round;
|
||||
logic [NUM_FORMATS-1:0] fmt_uf_after_round;
|
||||
|
||||
logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
|
||||
logic [NUM_INT_FORMATS-1:0] ifmt_of_after_round;
|
||||
|
||||
logic rounded_sign;
|
||||
logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
|
||||
logic result_true_zero;
|
||||
|
||||
logic [WIDTH-1:0] rounded_int_res; // after possible inversion
|
||||
logic rounded_int_res_zero; // after rounding
|
||||
|
||||
|
||||
// Pack exponent and mantissa into proper rounding form
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
|
||||
// Set up some constants
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
always_comb begin : assemble_result
|
||||
fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Sign-extend integer result
|
||||
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext
|
||||
// Set up some constants
|
||||
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
|
||||
|
||||
if (IntFmtConfig[ifmt]) begin : active_format
|
||||
always_comb begin : assemble_result
|
||||
// sign-extend reusult
|
||||
ifmt_pre_round_abs[ifmt] = '{default: final_int[INT_WIDTH-1]};
|
||||
ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0];
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Select output with destination format and operation
|
||||
assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2];
|
||||
|
||||
fpnew_rounding #(
|
||||
.AbsWidth ( WIDTH )
|
||||
) i_fpnew_rounding (
|
||||
.abs_value_i ( pre_round_abs ),
|
||||
.sign_i ( input_sign_q ), // source format
|
||||
.round_sticky_bits_i ( round_sticky_bits ),
|
||||
.rnd_mode_i ( rnd_mode_q ),
|
||||
.effective_subtraction_i ( 1'b0 ), // no operation happened
|
||||
.abs_rounded_o ( rounded_abs ),
|
||||
.sign_o ( rounded_sign ),
|
||||
.exact_zero_o ( result_true_zero )
|
||||
);
|
||||
|
||||
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
|
||||
|
||||
// Detect overflows and inject sign
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
always_comb begin : post_process
|
||||
// detect of / uf
|
||||
fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
|
||||
fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
|
||||
|
||||
// Assemble regular result, nan box short ones. Int zeroes need to be detected`
|
||||
fmt_result[fmt] = '1;
|
||||
fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q
|
||||
? '0
|
||||
: {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
|
||||
assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
|
||||
assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Negative integer result needs to be brought into two's complement
|
||||
assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
|
||||
assign rounded_int_res_zero = (rounded_int_res == '0);
|
||||
|
||||
// Detect integer overflows after rounding (only positives)
|
||||
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow
|
||||
// Set up some constants
|
||||
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
|
||||
|
||||
if (IntFmtConfig[ifmt]) begin : active_format
|
||||
always_comb begin : detect_overflow
|
||||
ifmt_of_after_round[ifmt] = 1'b0;
|
||||
// Int result can overflow if we're at the max exponent
|
||||
if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin
|
||||
// Check whether the rounded MSB differs from unrounded MSB
|
||||
ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2];
|
||||
end
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE;
|
||||
end
|
||||
end
|
||||
|
||||
// Classification after rounding select by destination format
|
||||
assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
|
||||
assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2];
|
||||
|
||||
// -------------------------
|
||||
// FP Special case handling
|
||||
// -------------------------
|
||||
logic [WIDTH-1:0] fp_special_result;
|
||||
fpnew_pkg::status_t fp_special_status;
|
||||
logic fp_result_is_special;
|
||||
|
||||
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
|
||||
|
||||
// Special result construction
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
|
||||
localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
always_comb begin : special_results
|
||||
logic [FP_WIDTH-1:0] special_res;
|
||||
special_res = info_q.is_zero
|
||||
? input_sign_q << FP_WIDTH-1 // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
|
||||
// Initialize special result with ones (NaN-box)
|
||||
fmt_special_result[fmt] = '1;
|
||||
fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format, I2F casts don't produce a special result
|
||||
assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero |
|
||||
info_q.is_nan |
|
||||
~info_q.is_boxed);
|
||||
|
||||
// Signalling input NaNs raise invalid flag, otherwise no flags set
|
||||
assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0};
|
||||
|
||||
// Assemble result according to destination format
|
||||
assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format
|
||||
|
||||
// --------------------------
|
||||
// INT Special case handling
|
||||
// --------------------------
|
||||
logic [WIDTH-1:0] int_special_result;
|
||||
fpnew_pkg::status_t int_special_status;
|
||||
logic int_result_is_special;
|
||||
|
||||
logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result;
|
||||
|
||||
// Special result construction
|
||||
for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int
|
||||
// Set up some constants
|
||||
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
|
||||
|
||||
if (IntFmtConfig[ifmt]) begin : active_format
|
||||
always_comb begin : special_results
|
||||
automatic logic [INT_WIDTH-1:0] special_res;
|
||||
|
||||
// Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1
|
||||
special_res[INT_WIDTH-2:0] = '1; // alone yields 2**(INT_WIDTH-1)-1
|
||||
special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1
|
||||
|
||||
// Negative special case (except for nans) tie to -max or 0
|
||||
if (input_sign_q && !info_q.is_nan)
|
||||
special_res = ~special_res;
|
||||
|
||||
// Initialize special result with sign-extension
|
||||
ifmt_special_result[ifmt] = '{default: special_res[INT_WIDTH-1]};
|
||||
ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res;
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special = info_q.is_nan | info_q.is_inf |
|
||||
of_before_round | of_after_round | ~info_q.is_boxed |
|
||||
(input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
|
||||
|
||||
// All integer special cases are invalid
|
||||
assign int_special_status = '{NV: 1'b1, default: 1'b0};
|
||||
|
||||
// Assemble result according to destination format
|
||||
assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format
|
||||
|
||||
// -----------------
|
||||
// Result selection
|
||||
// -----------------
|
||||
fpnew_pkg::status_t int_regular_status, fp_regular_status;
|
||||
|
||||
logic [WIDTH-1:0] fp_result, int_result;
|
||||
fpnew_pkg::status_t fp_status, int_status;
|
||||
|
||||
assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX;
|
||||
assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f
|
||||
: (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round));
|
||||
assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0};
|
||||
|
||||
assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2];
|
||||
assign fp_status = fp_result_is_special ? fp_special_status : fp_regular_status;
|
||||
assign int_result = int_result_is_special ? int_special_result : rounded_int_res;
|
||||
assign int_status = int_result_is_special ? int_special_status : int_regular_status;
|
||||
|
||||
// Final results for output pipeline
|
||||
logic [WIDTH-1:0] result_d;
|
||||
fpnew_pkg::status_t status_d;
|
||||
logic extension_bit;
|
||||
|
||||
// Select output depending on special case detection
|
||||
assign result_d = dst_is_int_q ? int_result : fp_result;
|
||||
assign status_d = dst_is_int_q ? int_status : fp_status;
|
||||
|
||||
// MSB of int result decides extension, otherwise NaN box
|
||||
assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1;
|
||||
|
||||
// ----------------
|
||||
// Output Pipeline
|
||||
// ----------------
|
||||
// Output pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
|
||||
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q;
|
||||
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
|
||||
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_OUT_REGS] out_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign out_pipe_result_q[0] = result_d;
|
||||
assign out_pipe_status_q[0] = status_d;
|
||||
assign out_pipe_ext_bit_q[0] = extension_bit;
|
||||
assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
|
||||
assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS];
|
||||
assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
|
||||
assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to inside pipe
|
||||
assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
|
||||
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
|
||||
// Output stage: assign module outputs
|
||||
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
|
||||
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
|
||||
assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
|
||||
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
|
||||
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
|
||||
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
|
||||
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
|
||||
assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
|
||||
endmodule
|
74
vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
vendored
74
vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv
vendored
|
@ -1,74 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
module fpnew_classifier #(
|
||||
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
|
||||
parameter int unsigned NumOperands = 1,
|
||||
// Do not change
|
||||
localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat)
|
||||
) (
|
||||
input logic [NumOperands-1:0][WIDTH-1:0] operands_i,
|
||||
input logic [NumOperands-1:0] is_boxed_i,
|
||||
output fpnew_pkg::fp_info_t [NumOperands-1:0] info_o
|
||||
);
|
||||
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
|
||||
|
||||
// Type definition
|
||||
typedef struct packed {
|
||||
logic sign;
|
||||
logic [EXP_BITS-1:0] exponent;
|
||||
logic [MAN_BITS-1:0] mantissa;
|
||||
} fp_t;
|
||||
|
||||
// Iterate through all operands
|
||||
for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values
|
||||
|
||||
fp_t value;
|
||||
logic is_boxed;
|
||||
logic is_normal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_signalling;
|
||||
logic is_quiet;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
|
||||
// ---------------
|
||||
// Classify Input
|
||||
// ---------------
|
||||
always_comb begin : classify_input
|
||||
value = operands_i[op];
|
||||
is_boxed = is_boxed_i[op];
|
||||
is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1);
|
||||
is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0);
|
||||
is_subnormal = is_boxed && (value.exponent == '0) && !is_zero;
|
||||
is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0));
|
||||
is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0));
|
||||
is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0);
|
||||
is_quiet = is_nan && !is_signalling;
|
||||
// Assign output for current input
|
||||
info_o[op].is_normal = is_normal;
|
||||
info_o[op].is_subnormal = is_subnormal;
|
||||
info_o[op].is_zero = is_zero;
|
||||
info_o[op].is_inf = is_inf;
|
||||
info_o[op].is_nan = is_nan;
|
||||
info_o[op].is_signalling = is_signalling;
|
||||
info_o[op].is_quiet = is_quiet;
|
||||
info_o[op].is_boxed = is_boxed;
|
||||
end
|
||||
end
|
||||
endmodule
|
366
vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
vendored
366
vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv
vendored
|
@ -1,366 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
module fpnew_divsqrt_multi #(
|
||||
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
|
||||
// FPU configuration
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER,
|
||||
parameter type TagType = logic,
|
||||
parameter type AuxType = logic,
|
||||
// Do not change
|
||||
localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig),
|
||||
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [1:0][WIDTH-1:0] operands_i, // 2 operands
|
||||
input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input fpnew_pkg::fp_format_e dst_fmt_i,
|
||||
input TagType tag_i,
|
||||
input logic mask_i,
|
||||
input AuxType aux_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
output logic divsqrt_done_o,
|
||||
input logic simd_synch_done_i,
|
||||
output logic divsqrt_ready_o,
|
||||
input logic simd_synch_rdy_i,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [WIDTH-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
output logic mask_o,
|
||||
output AuxType aux_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
// ----------
|
||||
// Constants
|
||||
// ----------
|
||||
// Pipelines
|
||||
localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE)
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? (NumPipeRegs / 2) // Last to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE)
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 1) / 2) // First to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
|
||||
// ---------------
|
||||
// Input pipeline
|
||||
// ---------------
|
||||
// Selected pipeline output signals as non-arrays
|
||||
logic [1:0][WIDTH-1:0] operands_q;
|
||||
fpnew_pkg::roundmode_e rnd_mode_q;
|
||||
fpnew_pkg::operation_e op_q;
|
||||
fpnew_pkg::fp_format_e dst_fmt_q;
|
||||
logic in_valid_q;
|
||||
|
||||
// Input pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
|
||||
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
|
||||
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
|
||||
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_INP_REGS] inp_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign inp_pipe_operands_q[0] = operands_i;
|
||||
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
|
||||
assign inp_pipe_op_q[0] = op_i;
|
||||
assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
|
||||
assign inp_pipe_tag_q[0] = tag_i;
|
||||
assign inp_pipe_mask_q[0] = mask_i;
|
||||
assign inp_pipe_aux_q[0] = aux_i;
|
||||
assign inp_pipe_valid_q[0] = in_valid_i;
|
||||
// Input stage: Propagate pipeline ready signal to updtream circuitry
|
||||
assign in_ready_o = inp_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
|
||||
`FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: assign selected pipe outputs to signals for later use
|
||||
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
|
||||
assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS];
|
||||
assign op_q = inp_pipe_op_q[NUM_INP_REGS];
|
||||
assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
|
||||
assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS];
|
||||
|
||||
// -----------------
|
||||
// Input processing
|
||||
// -----------------
|
||||
logic [1:0] divsqrt_fmt;
|
||||
logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit
|
||||
logic input_is_fp8;
|
||||
|
||||
// Translate fpnew formats into divsqrt formats
|
||||
always_comb begin : translate_fmt
|
||||
unique case (dst_fmt_q)
|
||||
fpnew_pkg::FP32: divsqrt_fmt = 2'b00;
|
||||
fpnew_pkg::FP64: divsqrt_fmt = 2'b01;
|
||||
fpnew_pkg::FP16: divsqrt_fmt = 2'b10;
|
||||
fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11;
|
||||
default: divsqrt_fmt = 2'b10; // maps also FP8 to FP16
|
||||
endcase
|
||||
|
||||
// Only if FP8 is enabled
|
||||
input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8);
|
||||
|
||||
// If FP8 is supported, map it to an FP16 value
|
||||
divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0];
|
||||
divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1];
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Control FSM
|
||||
// ------------
|
||||
|
||||
logic in_ready; // input handshake with upstream
|
||||
logic div_valid, sqrt_valid; // input signalling with unit
|
||||
logic unit_ready, unit_done, unit_done_q; // status signals from unit instance
|
||||
logic op_starting; // high in the cycle a new operation starts
|
||||
logic out_valid, out_ready; // output handshake with downstream
|
||||
logic unit_busy; // valid data in flight
|
||||
// FSM states
|
||||
typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
|
||||
fsm_state_e state_q, state_d;
|
||||
|
||||
// Ready synch with other lanes
|
||||
// Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
|
||||
assign divsqrt_ready_o = in_ready;
|
||||
// Upstream ready comes from sanitization FSM, and it is synched among all the lanes
|
||||
assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i;
|
||||
|
||||
// Valid synch with other lanes
|
||||
// When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
|
||||
// As soon as all the lanes are over, we can clear this FF and start with a new operation
|
||||
`FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni);
|
||||
// Tell the other units that this unit has finished now or in the past
|
||||
assign divsqrt_done_o = unit_done_q | unit_done;
|
||||
|
||||
// Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
|
||||
assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
|
||||
assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
|
||||
assign op_starting = div_valid | sqrt_valid;
|
||||
|
||||
// FSM to safely apply and receive data from DIVSQRT unit
|
||||
always_comb begin : flag_fsm
|
||||
// Default assignments
|
||||
in_ready = 1'b0;
|
||||
out_valid = 1'b0;
|
||||
unit_busy = 1'b0;
|
||||
state_d = state_q;
|
||||
|
||||
unique case (state_q)
|
||||
// Waiting for work
|
||||
IDLE: begin
|
||||
in_ready = 1'b1; // we're ready
|
||||
if (in_valid_q && unit_ready) begin // New work arrives
|
||||
state_d = BUSY; // go into processing state
|
||||
end
|
||||
end
|
||||
// Operation in progress
|
||||
BUSY: begin
|
||||
unit_busy = 1'b1; // data in flight
|
||||
// If all the lanes are done with processing
|
||||
if (simd_synch_done_i) begin
|
||||
out_valid = 1'b1; // try to commit result downstream
|
||||
// If downstream accepts our result
|
||||
if (out_ready) begin
|
||||
state_d = IDLE; // we anticipate going back to idling..
|
||||
if (in_valid_q && unit_ready) begin // ..unless new work comes in
|
||||
in_ready = 1'b1; // we acknowledge the instruction
|
||||
state_d = BUSY; // and stay busy with it
|
||||
end
|
||||
// Otherwise if downstream is not ready for the result
|
||||
end else begin
|
||||
state_d = HOLD; // wait for the pipeline to take the data
|
||||
end
|
||||
end
|
||||
end
|
||||
// Waiting with valid result for downstream
|
||||
HOLD: begin
|
||||
unit_busy = 1'b1; // data in flight
|
||||
out_valid = 1'b1; // try to commit result downstream
|
||||
// If the result is accepted by downstream
|
||||
if (out_ready) begin
|
||||
state_d = IDLE; // go back to idle..
|
||||
if (in_valid_q && unit_ready) begin // ..unless new work comes in
|
||||
in_ready = 1'b1; // acknowledge the new transaction
|
||||
state_d = BUSY; // will be busy with the next instruction
|
||||
end
|
||||
end
|
||||
end
|
||||
// fall into idle state otherwise
|
||||
default: state_d = IDLE;
|
||||
endcase
|
||||
|
||||
// Flushing overrides the other actions
|
||||
if (flush_i) begin
|
||||
unit_busy = 1'b0; // data is invalidated
|
||||
out_valid = 1'b0; // cancel any valid data
|
||||
state_d = IDLE; // go to default state
|
||||
end
|
||||
end
|
||||
|
||||
// FSM status register (asynch active low reset)
|
||||
`FF(state_q, state_d, IDLE)
|
||||
|
||||
// Hold additional information while the operation is in progress
|
||||
logic result_is_fp8_q;
|
||||
TagType result_tag_q;
|
||||
logic result_mask_q;
|
||||
AuxType result_aux_q;
|
||||
|
||||
// Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
|
||||
`FFL(result_is_fp8_q, input_is_fp8, op_starting, '0)
|
||||
`FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
|
||||
`FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
|
||||
`FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
|
||||
|
||||
// -----------------
|
||||
// DIVSQRT instance
|
||||
// -----------------
|
||||
logic [63:0] unit_result;
|
||||
logic [WIDTH-1:0] adjusted_result, held_result_q;
|
||||
fpnew_pkg::status_t unit_status, held_status_q;
|
||||
logic hold_en;
|
||||
|
||||
div_sqrt_top_mvp i_divsqrt_lei (
|
||||
.Clk_CI ( clk_i ),
|
||||
.Rst_RBI ( rst_ni ),
|
||||
.Div_start_SI ( div_valid ),
|
||||
.Sqrt_start_SI ( sqrt_valid ),
|
||||
.Operand_a_DI ( divsqrt_operands[0] ),
|
||||
.Operand_b_DI ( divsqrt_operands[1] ),
|
||||
.RM_SI ( rnd_mode_q ),
|
||||
.Precision_ctl_SI ( '0 ),
|
||||
.Format_sel_SI ( divsqrt_fmt ),
|
||||
.Kill_SI ( flush_i ),
|
||||
.Result_DO ( unit_result ),
|
||||
.Fflags_SO ( unit_status ),
|
||||
.Ready_SO ( unit_ready ),
|
||||
.Done_SO ( unit_done )
|
||||
);
|
||||
|
||||
// Adjust result width and fix FP8
|
||||
assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
|
||||
|
||||
// Hold the result when one lane has finished execution, except when all the lanes finish together
|
||||
// and the result can be accepted downstream
|
||||
assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready);
|
||||
// The Hold register (load, no reset)
|
||||
`FFLNR(held_result_q, adjusted_result, hold_en, clk_i)
|
||||
`FFLNR(held_status_q, unit_status, hold_en, clk_i)
|
||||
|
||||
// --------------
|
||||
// Output Select
|
||||
// --------------
|
||||
logic [WIDTH-1:0] result_d;
|
||||
fpnew_pkg::status_t status_d;
|
||||
// Prioritize hold register data
|
||||
assign result_d = unit_done_q ? held_result_q : adjusted_result;
|
||||
assign status_d = unit_done_q ? held_status_q : unit_status;
|
||||
|
||||
// ----------------
|
||||
// Output Pipeline
|
||||
// ----------------
|
||||
// Output pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
|
||||
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
|
||||
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
|
||||
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_OUT_REGS] out_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign out_pipe_result_q[0] = result_d;
|
||||
assign out_pipe_status_q[0] = status_d;
|
||||
assign out_pipe_tag_q[0] = result_tag_q;
|
||||
assign out_pipe_mask_q[0] = result_mask_q;
|
||||
assign out_pipe_aux_q[0] = result_aux_q;
|
||||
assign out_pipe_valid_q[0] = out_valid;
|
||||
// Input stage: Propagate pipeline ready signal to inside pipe
|
||||
assign out_ready = out_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
|
||||
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
|
||||
// Output stage: assign module outputs
|
||||
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
|
||||
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
|
||||
assign extension_bit_o = 1'b1; // always NaN-Box result
|
||||
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
|
||||
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
|
||||
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
|
||||
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
|
||||
assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q});
|
||||
endmodule
|
690
vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
vendored
690
vendor/openhwgroup/cvfpu/src/fpnew_fma.sv
vendored
|
@ -1,690 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
module fpnew_fma #(
|
||||
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
parameter type AuxType = logic,
|
||||
|
||||
localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [2:0][WIDTH-1:0] operands_i, // 3 operands
|
||||
input logic [2:0] is_boxed_i, // 3 operands
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input TagType tag_i,
|
||||
input logic mask_i,
|
||||
input AuxType aux_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [WIDTH-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
output logic mask_o,
|
||||
output AuxType aux_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
// ----------
|
||||
// Constants
|
||||
// ----------
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
|
||||
localparam int unsigned BIAS = fpnew_pkg::bias(FpFormat);
|
||||
// Precision bits 'p' include the implicit bit
|
||||
localparam int unsigned PRECISION_BITS = MAN_BITS + 1;
|
||||
// The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
|
||||
localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3;
|
||||
localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
|
||||
// Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
|
||||
// datapath leakage. This is either given by the exponent bits or the width of the LZC result.
|
||||
// In most reasonable FP formats the internal exponent will be wider than the LZC result.
|
||||
localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH));
|
||||
// Shift amount width: maximum internal mantissa size is 3p+4 bits
|
||||
localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5);
|
||||
// Pipelines
|
||||
localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 2) / 3) // First to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? (NumPipeRegs / 3) // Last to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
|
||||
// ----------------
|
||||
// Type definition
|
||||
// ----------------
|
||||
typedef struct packed {
|
||||
logic sign;
|
||||
logic [EXP_BITS-1:0] exponent;
|
||||
logic [MAN_BITS-1:0] mantissa;
|
||||
} fp_t;
|
||||
|
||||
// ---------------
|
||||
// Input pipeline
|
||||
// ---------------
|
||||
// Input pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
|
||||
logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
|
||||
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
|
||||
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
|
||||
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_INP_REGS] inp_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign inp_pipe_operands_q[0] = operands_i;
|
||||
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
|
||||
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
|
||||
assign inp_pipe_op_q[0] = op_i;
|
||||
assign inp_pipe_op_mod_q[0] = op_mod_i;
|
||||
assign inp_pipe_tag_q[0] = tag_i;
|
||||
assign inp_pipe_mask_q[0] = mask_i;
|
||||
assign inp_pipe_aux_q[0] = aux_i;
|
||||
assign inp_pipe_valid_q[0] = in_valid_i;
|
||||
// Input stage: Propagate pipeline ready signal to updtream circuitry
|
||||
assign in_ready_o = inp_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
|
||||
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
|
||||
// -----------------
|
||||
// Input processing
|
||||
// -----------------
|
||||
fpnew_pkg::fp_info_t [2:0] info_q;
|
||||
|
||||
// Classify input
|
||||
fpnew_classifier #(
|
||||
.FpFormat ( FpFormat ),
|
||||
.NumOperands ( 3 )
|
||||
) i_class_inputs (
|
||||
.operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
|
||||
.is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
|
||||
.info_o ( info_q )
|
||||
);
|
||||
|
||||
fp_t operand_a, operand_b, operand_c;
|
||||
fpnew_pkg::fp_info_t info_a, info_b, info_c;
|
||||
|
||||
// Operation selection and operand adjustment
|
||||
// | \c op_q | \c op_mod_q | Operation Adjustment
|
||||
// |:--------:|:-----------:|---------------------
|
||||
// | FMADD | \c 0 | FMADD: none
|
||||
// | FMADD | \c 1 | FMSUB: Invert sign of operand C
|
||||
// | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A
|
||||
// | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C
|
||||
// | ADD | \c 0 | ADD: Set operand A to +1.0
|
||||
// | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C
|
||||
// | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode
|
||||
// | *others* | \c - | *invalid*
|
||||
// \note \c op_mod_q always inverts the sign of the addend.
|
||||
always_comb begin : op_select
|
||||
|
||||
// Default assignments - packing-order-agnostic
|
||||
operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
|
||||
operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
|
||||
operand_c = inp_pipe_operands_q[NUM_INP_REGS][2];
|
||||
info_a = info_q[0];
|
||||
info_b = info_q[1];
|
||||
info_c = info_q[2];
|
||||
|
||||
// op_mod_q inverts sign of operand C
|
||||
operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
|
||||
|
||||
unique case (inp_pipe_op_q[NUM_INP_REGS])
|
||||
fpnew_pkg::FMADD: ; // do nothing
|
||||
fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
|
||||
fpnew_pkg::ADD: begin // Set multiplicand to +1
|
||||
operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0};
|
||||
info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
|
||||
end
|
||||
fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN
|
||||
if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN)
|
||||
operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0};
|
||||
else
|
||||
operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
|
||||
info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
|
||||
end
|
||||
default: begin // propagate don't cares
|
||||
operand_a = '{default: fpnew_pkg::DONT_CARE};
|
||||
operand_b = '{default: fpnew_pkg::DONT_CARE};
|
||||
operand_c = '{default: fpnew_pkg::DONT_CARE};
|
||||
info_a = '{default: fpnew_pkg::DONT_CARE};
|
||||
info_b = '{default: fpnew_pkg::DONT_CARE};
|
||||
info_c = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// ---------------------
|
||||
// Input classification
|
||||
// ---------------------
|
||||
logic any_operand_inf;
|
||||
logic any_operand_nan;
|
||||
logic signalling_nan;
|
||||
logic effective_subtraction;
|
||||
logic tentative_sign;
|
||||
|
||||
// Reduction for special case handling
|
||||
assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf});
|
||||
assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan});
|
||||
assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
|
||||
// Effective subtraction in FMA occurs when product and addend signs differ
|
||||
assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
|
||||
// The tentative sign of the FMA shall be the sign of the product
|
||||
assign tentative_sign = operand_a.sign ^ operand_b.sign;
|
||||
|
||||
// ----------------------
|
||||
// Special case handling
|
||||
// ----------------------
|
||||
fp_t special_result;
|
||||
fpnew_pkg::status_t special_status;
|
||||
logic result_is_special;
|
||||
|
||||
always_comb begin : special_cases
|
||||
// Default assignments
|
||||
special_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
|
||||
special_status = '0;
|
||||
result_is_special = 1'b0;
|
||||
|
||||
// Handle potentially mixed nan & infinity input => important for the case where infinity and
|
||||
// zero are multiplied and added to a qnan.
|
||||
// RISC-V mandates raising the NV exception in these cases:
|
||||
// (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
|
||||
if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
|
||||
result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
|
||||
special_status.NV = 1'b1; // invalid operation
|
||||
// NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
|
||||
end else if (any_operand_nan) begin
|
||||
result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN
|
||||
special_status.NV = signalling_nan; // raise the invalid operation flag if signalling
|
||||
// Special cases involving infinity
|
||||
end else if (any_operand_inf) begin
|
||||
result_is_special = 1'b1; // bypass FMA
|
||||
// Effective addition of opposite infinities (±inf - ±inf) is invalid!
|
||||
if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
|
||||
special_status.NV = 1'b1; // invalid operation
|
||||
// Handle cases where output will be inf because of inf product input
|
||||
else if (info_a.is_inf || info_b.is_inf) begin
|
||||
// Result is infinity with the sign of the product
|
||||
special_result = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0};
|
||||
// Handle cases where the addend is inf
|
||||
end else if (info_c.is_inf) begin
|
||||
// Result is inifinity with sign of the addend (= operand_c)
|
||||
special_result = '{sign: operand_c.sign, exponent: '1, mantissa: '0};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ---------------------------
|
||||
// Initial exponent data path
|
||||
// ---------------------------
|
||||
logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
|
||||
logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
|
||||
logic signed [EXP_WIDTH-1:0] tentative_exponent;
|
||||
|
||||
// Zero-extend exponents into signed container - implicit width extension
|
||||
assign exponent_a = signed'({1'b0, operand_a.exponent});
|
||||
assign exponent_b = signed'({1'b0, operand_b.exponent});
|
||||
assign exponent_c = signed'({1'b0, operand_c.exponent});
|
||||
|
||||
// Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
|
||||
// with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased.
|
||||
assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
|
||||
// Biased product exponent is the sum of encoded exponents minus the bias.
|
||||
assign exponent_product = (info_a.is_zero || info_b.is_zero)
|
||||
? 2 - signed'(BIAS) // in case the product is zero, set minimum exp.
|
||||
: signed'(exponent_a + info_a.is_subnormal
|
||||
+ exponent_b + info_b.is_subnormal
|
||||
- signed'(BIAS));
|
||||
// Exponent difference is the addend exponent minus the product exponent
|
||||
assign exponent_difference = exponent_addend - exponent_product;
|
||||
// The tentative exponent will be the larger of the product or addend exponent
|
||||
assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
|
||||
|
||||
// Shift amount for addend based on exponents (unsigned as only right shifts)
|
||||
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
|
||||
|
||||
always_comb begin : addend_shift_amount
|
||||
// Product-anchored case, saturated shift (addend is only in the sticky bit)
|
||||
if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
|
||||
addend_shamt = 3 * PRECISION_BITS + 4;
|
||||
// Addend and product will have mutual bits to add
|
||||
else if (exponent_difference <= signed'(PRECISION_BITS + 2))
|
||||
addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
|
||||
// Addend-anchored case, saturated shift (product is only in the sticky bit)
|
||||
else
|
||||
addend_shamt = 0;
|
||||
end
|
||||
|
||||
// ------------------
|
||||
// Product data path
|
||||
// ------------------
|
||||
logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c;
|
||||
logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide
|
||||
logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R)
|
||||
|
||||
// Add implicit bits to mantissae
|
||||
assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
|
||||
assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
|
||||
assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
|
||||
|
||||
// Mantissa multiplier (a*b)
|
||||
assign product = mantissa_a * mantissa_b;
|
||||
|
||||
// Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
|
||||
// | 000...000 | product | RS |
|
||||
// <- p+2 -> <- 2p -> < 2>
|
||||
assign product_shifted = product << 2; // constant shift
|
||||
|
||||
// -----------------
|
||||
// Addend data path
|
||||
// -----------------
|
||||
logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on
|
||||
logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky
|
||||
logic sticky_before_add; // they are compressed into a single sticky bit
|
||||
logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R)
|
||||
logic inject_carry_in; // inject carry for subtractions if needed
|
||||
|
||||
// In parallel, the addend is right-shifted according to the exponent difference. Up to p bits
|
||||
// are shifted out and compressed into a sticky bit.
|
||||
// BEFORE THE SHIFT:
|
||||
// | mantissa_c | 000..000 |
|
||||
// <- p -> <- 3p+4 ->
|
||||
// AFTER THE SHIFT:
|
||||
// | 000..........000 | mantissa_c | 000...............0GR | sticky bits |
|
||||
// <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p ->
|
||||
assign {addend_after_shift, addend_sticky_bits} =
|
||||
(mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
|
||||
|
||||
assign sticky_before_add = (| addend_sticky_bits);
|
||||
// assign addend_after_shift[0] = sticky_before_add;
|
||||
|
||||
// In case of a subtraction, the addend is inverted
|
||||
assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
|
||||
assign inject_carry_in = effective_subtraction & ~sticky_before_add;
|
||||
|
||||
// ------
|
||||
// Adder
|
||||
// ------
|
||||
logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry
|
||||
logic sum_carry; // observe carry bit from sum for sign fixing
|
||||
logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow
|
||||
logic final_sign;
|
||||
|
||||
//Mantissa adder (ab+c). In normal addition, it cannot overflow.
|
||||
assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
|
||||
assign sum_carry = sum_raw[3*PRECISION_BITS+4];
|
||||
|
||||
// Complement negative sum (can only happen in subtraction -> overflows for positive results)
|
||||
assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
|
||||
|
||||
// In case of a mispredicted subtraction result, do a sign flip
|
||||
assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
|
||||
? 1'b1
|
||||
: (effective_subtraction ? 1'b0 : tentative_sign);
|
||||
|
||||
// ---------------
|
||||
// Internal pipeline
|
||||
// ---------------
|
||||
// Pipeline output signals as non-arrays
|
||||
logic effective_subtraction_q;
|
||||
logic signed [EXP_WIDTH-1:0] exponent_product_q;
|
||||
logic signed [EXP_WIDTH-1:0] exponent_difference_q;
|
||||
logic signed [EXP_WIDTH-1:0] tentative_exponent_q;
|
||||
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
|
||||
logic sticky_before_add_q;
|
||||
logic [3*PRECISION_BITS+3:0] sum_q;
|
||||
logic final_sign_q;
|
||||
fpnew_pkg::roundmode_e rnd_mode_q;
|
||||
logic result_is_special_q;
|
||||
fp_t special_result_q;
|
||||
fpnew_pkg::status_t special_status_q;
|
||||
// Internal pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q;
|
||||
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q;
|
||||
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q;
|
||||
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q;
|
||||
logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
|
||||
logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q;
|
||||
fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q;
|
||||
fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q;
|
||||
TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_mask_q;
|
||||
AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_MID_REGS] mid_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from upstream logic
|
||||
assign mid_pipe_eff_sub_q[0] = effective_subtraction;
|
||||
assign mid_pipe_exp_prod_q[0] = exponent_product;
|
||||
assign mid_pipe_exp_diff_q[0] = exponent_difference;
|
||||
assign mid_pipe_tent_exp_q[0] = tentative_exponent;
|
||||
assign mid_pipe_add_shamt_q[0] = addend_shamt;
|
||||
assign mid_pipe_sticky_q[0] = sticky_before_add;
|
||||
assign mid_pipe_sum_q[0] = sum;
|
||||
assign mid_pipe_final_sign_q[0] = final_sign;
|
||||
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
|
||||
assign mid_pipe_res_is_spec_q[0] = result_is_special;
|
||||
assign mid_pipe_spec_res_q[0] = special_result;
|
||||
assign mid_pipe_spec_stat_q[0] = special_status;
|
||||
assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
|
||||
assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
|
||||
assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
|
||||
assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to input pipe
|
||||
assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
|
||||
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: assign selected pipe outputs to signals for later use
|
||||
assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
|
||||
assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS];
|
||||
assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS];
|
||||
assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS];
|
||||
assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
|
||||
assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
|
||||
assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
|
||||
assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
|
||||
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
|
||||
assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS];
|
||||
assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS];
|
||||
assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS];
|
||||
|
||||
// --------------
|
||||
// Normalization
|
||||
// --------------
|
||||
logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
|
||||
logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
|
||||
logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
|
||||
logic lzc_zeroes; // in case only zeroes found
|
||||
|
||||
logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
|
||||
logic signed [EXP_WIDTH-1:0] normalized_exponent;
|
||||
|
||||
logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift
|
||||
logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit
|
||||
logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization
|
||||
logic sticky_after_norm; // sticky bit after normalization
|
||||
|
||||
logic signed [EXP_WIDTH-1:0] final_exponent;
|
||||
|
||||
assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
|
||||
|
||||
// Leading zero counter for cancellations
|
||||
lzc #(
|
||||
.WIDTH ( LOWER_SUM_WIDTH ),
|
||||
.MODE ( 1 ) // MODE = 1 counts leading zeroes
|
||||
) i_lzc (
|
||||
.in_i ( sum_lower ),
|
||||
.cnt_o ( leading_zero_count ),
|
||||
.empty_o ( lzc_zeroes )
|
||||
);
|
||||
|
||||
assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
|
||||
|
||||
// Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
|
||||
always_comb begin : norm_shift_amount
|
||||
// Product-anchored case or cancellations require LZC
|
||||
if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
|
||||
// Normal result (biased exponent > 0 and not a zero)
|
||||
if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
|
||||
// Undo initial product shift, remove the counted zeroes
|
||||
norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
|
||||
normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
|
||||
// Subnormal result
|
||||
end else begin
|
||||
// Cap the shift distance to align mantissa with minimum exponent
|
||||
norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q);
|
||||
normalized_exponent = 0; // subnormals encoded as 0
|
||||
end
|
||||
// Addend-anchored case
|
||||
end else begin
|
||||
norm_shamt = addend_shamt_q; // Undo the initial shift
|
||||
normalized_exponent = tentative_exponent_q;
|
||||
end
|
||||
end
|
||||
|
||||
// Do the large normalization shift
|
||||
assign sum_shifted = sum_q << norm_shamt;
|
||||
|
||||
// The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
|
||||
// or right of the (non-carry) MSB of the sum.
|
||||
always_comb begin : small_norm
|
||||
// Default assignment, discarding carry bit
|
||||
{final_mantissa, sum_sticky_bits} = sum_shifted;
|
||||
final_exponent = normalized_exponent;
|
||||
|
||||
// The normalized sum has overflown, align right and fix exponent
|
||||
if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
|
||||
{final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
|
||||
final_exponent = normalized_exponent + 1;
|
||||
// The normalized sum is normal, nothing to do
|
||||
end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
|
||||
// do nothing
|
||||
// The normalized sum is still denormal, align left - unless the result is not already subnormal
|
||||
end else if (normalized_exponent > 1) begin
|
||||
{final_mantissa, sum_sticky_bits} = sum_shifted << 1;
|
||||
final_exponent = normalized_exponent - 1;
|
||||
// Otherwise we're denormal
|
||||
end else begin
|
||||
final_exponent = '0;
|
||||
end
|
||||
end
|
||||
|
||||
// Update the sticky bit with the shifted-out bits
|
||||
assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
|
||||
|
||||
// ----------------------------
|
||||
// Rounding and classification
|
||||
// ----------------------------
|
||||
logic pre_round_sign;
|
||||
logic [EXP_BITS-1:0] pre_round_exponent;
|
||||
logic [MAN_BITS-1:0] pre_round_mantissa;
|
||||
logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
|
||||
logic [1:0] round_sticky_bits;
|
||||
|
||||
logic of_before_round, of_after_round; // overflow
|
||||
logic uf_before_round, uf_after_round; // underflow
|
||||
logic result_zero;
|
||||
|
||||
logic rounded_sign;
|
||||
logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
|
||||
|
||||
// Classification before round. RISC-V mandates checking underflow AFTER rounding!
|
||||
assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones
|
||||
assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0
|
||||
|
||||
// Assemble result before rounding. In case of overflow, the largest normal value is set.
|
||||
assign pre_round_sign = final_sign_q;
|
||||
assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]);
|
||||
assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit
|
||||
assign pre_round_abs = {pre_round_exponent, pre_round_mantissa};
|
||||
|
||||
// In case of overflow, the round and sticky bits are set for proper rounding
|
||||
assign round_sticky_bits = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm};
|
||||
|
||||
// Perform the rounding
|
||||
fpnew_rounding #(
|
||||
.AbsWidth ( EXP_BITS + MAN_BITS )
|
||||
) i_fpnew_rounding (
|
||||
.abs_value_i ( pre_round_abs ),
|
||||
.sign_i ( pre_round_sign ),
|
||||
.round_sticky_bits_i ( round_sticky_bits ),
|
||||
.rnd_mode_i ( rnd_mode_q ),
|
||||
.effective_subtraction_i ( effective_subtraction_q ),
|
||||
.abs_rounded_o ( rounded_abs ),
|
||||
.sign_o ( rounded_sign ),
|
||||
.exact_zero_o ( result_zero )
|
||||
);
|
||||
|
||||
// Classification after rounding
|
||||
assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0
|
||||
assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones
|
||||
|
||||
// -----------------
|
||||
// Result selection
|
||||
// -----------------
|
||||
logic [WIDTH-1:0] regular_result;
|
||||
fpnew_pkg::status_t regular_status;
|
||||
|
||||
// Assemble regular result
|
||||
assign regular_result = {rounded_sign, rounded_abs};
|
||||
assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
|
||||
assign regular_status.DZ = 1'b0; // no divisions
|
||||
assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow
|
||||
assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
|
||||
assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
|
||||
|
||||
// Final results for output pipeline
|
||||
fp_t result_d;
|
||||
fpnew_pkg::status_t status_d;
|
||||
|
||||
// Select output depending on special case detection
|
||||
assign result_d = result_is_special_q ? special_result_q : regular_result;
|
||||
assign status_d = result_is_special_q ? special_status_q : regular_status;
|
||||
|
||||
// ----------------
|
||||
// Output Pipeline
|
||||
// ----------------
|
||||
// Output pipeline signals, index i holds signal after i register stages
|
||||
fp_t [0:NUM_OUT_REGS] out_pipe_result_q;
|
||||
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
|
||||
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
|
||||
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_OUT_REGS] out_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign out_pipe_result_q[0] = result_d;
|
||||
assign out_pipe_status_q[0] = status_d;
|
||||
assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
|
||||
assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS];
|
||||
assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
|
||||
assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to inside pipe
|
||||
assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
|
||||
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
|
||||
// Output stage: assign module outputs
|
||||
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
|
||||
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
|
||||
assign extension_bit_o = 1'b1; // always NaN-Box result
|
||||
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
|
||||
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
|
||||
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
|
||||
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
|
||||
assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
|
||||
endmodule
|
839
vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
vendored
839
vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv
vendored
|
@ -1,839 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
module fpnew_fma_multi #(
|
||||
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
parameter type AuxType = logic,
|
||||
// Do not change
|
||||
localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig),
|
||||
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [2:0][WIDTH-1:0] operands_i, // 3 operands
|
||||
input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands
|
||||
input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result
|
||||
input TagType tag_i,
|
||||
input logic mask_i,
|
||||
input AuxType aux_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [WIDTH-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
output logic mask_o,
|
||||
output AuxType aux_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
// ----------
|
||||
// Constants
|
||||
// ----------
|
||||
// The super-format that can hold all formats
|
||||
localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig);
|
||||
|
||||
localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits;
|
||||
localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits;
|
||||
|
||||
// Precision bits 'p' include the implicit bit
|
||||
localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1;
|
||||
// The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection
|
||||
localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3;
|
||||
localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH);
|
||||
// Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid
|
||||
// datapath leakage. This is either given by the exponent bits or the width of the LZC result.
|
||||
// In most reasonable FP formats the internal exponent will be wider than the LZC result.
|
||||
localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH);
|
||||
// Shift amount width: maximum internal mantissa size is 3p+4 bits
|
||||
localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5);
|
||||
// Pipelines
|
||||
localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 1) / 3) // Second to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 2) / 3) // First to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? (NumPipeRegs / 3) // Last to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
|
||||
// ----------------
|
||||
// Type definition
|
||||
// ----------------
|
||||
typedef struct packed {
|
||||
logic sign;
|
||||
logic [SUPER_EXP_BITS-1:0] exponent;
|
||||
logic [SUPER_MAN_BITS-1:0] mantissa;
|
||||
} fp_t;
|
||||
|
||||
// ---------------
|
||||
// Input pipeline
|
||||
// ---------------
|
||||
// Selected pipeline output signals as non-arrays
|
||||
logic [2:0][WIDTH-1:0] operands_q;
|
||||
fpnew_pkg::fp_format_e src_fmt_q;
|
||||
fpnew_pkg::fp_format_e dst_fmt_q;
|
||||
|
||||
// Input pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q;
|
||||
logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
|
||||
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q;
|
||||
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
|
||||
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_INP_REGS] inp_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign inp_pipe_operands_q[0] = operands_i;
|
||||
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
|
||||
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
|
||||
assign inp_pipe_op_q[0] = op_i;
|
||||
assign inp_pipe_op_mod_q[0] = op_mod_i;
|
||||
assign inp_pipe_src_fmt_q[0] = src_fmt_i;
|
||||
assign inp_pipe_dst_fmt_q[0] = dst_fmt_i;
|
||||
assign inp_pipe_tag_q[0] = tag_i;
|
||||
assign inp_pipe_mask_q[0] = mask_i;
|
||||
assign inp_pipe_aux_q[0] = aux_i;
|
||||
assign inp_pipe_valid_q[0] = in_valid_i;
|
||||
// Input stage: Propagate pipeline ready signal to updtream circuitry
|
||||
assign in_ready_o = inp_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
|
||||
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: assign selected pipe outputs to signals for later use
|
||||
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
|
||||
assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS];
|
||||
assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS];
|
||||
|
||||
// -----------------
|
||||
// Input processing
|
||||
// -----------------
|
||||
logic [NUM_FORMATS-1:0][2:0] fmt_sign;
|
||||
logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent;
|
||||
logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa;
|
||||
|
||||
fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q;
|
||||
|
||||
// FP Input initialization
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
logic [2:0][FP_WIDTH-1:0] trimmed_ops;
|
||||
|
||||
// Classify input
|
||||
fpnew_classifier #(
|
||||
.FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
|
||||
.NumOperands ( 3 )
|
||||
) i_fpnew_classifier (
|
||||
.operands_i ( trimmed_ops ),
|
||||
.is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ),
|
||||
.info_o ( info_q[fmt] )
|
||||
);
|
||||
for (genvar op = 0; op < 3; op++) begin : gen_operands
|
||||
assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0];
|
||||
assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1];
|
||||
assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]});
|
||||
assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} <<
|
||||
(SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled
|
||||
assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled
|
||||
end
|
||||
end
|
||||
|
||||
fp_t operand_a, operand_b, operand_c;
|
||||
fpnew_pkg::fp_info_t info_a, info_b, info_c;
|
||||
|
||||
// Operation selection and operand adjustment
|
||||
// | \c op_q | \c op_mod_q | Operation Adjustment
|
||||
// |:--------:|:-----------:|---------------------
|
||||
// | FMADD | \c 0 | FMADD: none
|
||||
// | FMADD | \c 1 | FMSUB: Invert sign of operand C
|
||||
// | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A
|
||||
// | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C
|
||||
// | ADD | \c 0 | ADD: Set operand A to +1.0
|
||||
// | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C
|
||||
// | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode
|
||||
// | *others* | \c - | *invalid*
|
||||
// \note \c op_mod_q always inverts the sign of the addend.
|
||||
always_comb begin : op_select
|
||||
|
||||
// Default assignments - packing-order-agnostic
|
||||
operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]};
|
||||
operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]};
|
||||
operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]};
|
||||
info_a = info_q[src_fmt_q][0];
|
||||
info_b = info_q[src_fmt_q][1];
|
||||
info_c = info_q[dst_fmt_q][2];
|
||||
|
||||
// op_mod_q inverts sign of operand C
|
||||
operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS];
|
||||
|
||||
unique case (inp_pipe_op_q[NUM_INP_REGS])
|
||||
fpnew_pkg::FMADD: ; // do nothing
|
||||
fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product
|
||||
fpnew_pkg::ADD: begin // Set multiplicand to +1
|
||||
operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0};
|
||||
info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value.
|
||||
end
|
||||
fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN
|
||||
if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN)
|
||||
operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0};
|
||||
else
|
||||
operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0};
|
||||
info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value.
|
||||
end
|
||||
default: begin // propagate don't cares
|
||||
operand_a = '{default: fpnew_pkg::DONT_CARE};
|
||||
operand_b = '{default: fpnew_pkg::DONT_CARE};
|
||||
operand_c = '{default: fpnew_pkg::DONT_CARE};
|
||||
info_a = '{default: fpnew_pkg::DONT_CARE};
|
||||
info_b = '{default: fpnew_pkg::DONT_CARE};
|
||||
info_c = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// ---------------------
|
||||
// Input classification
|
||||
// ---------------------
|
||||
logic any_operand_inf;
|
||||
logic any_operand_nan;
|
||||
logic signalling_nan;
|
||||
logic effective_subtraction;
|
||||
logic tentative_sign;
|
||||
|
||||
// Reduction for special case handling
|
||||
assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf});
|
||||
assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan});
|
||||
assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling});
|
||||
// Effective subtraction in FMA occurs when product and addend signs differ
|
||||
assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign;
|
||||
// The tentative sign of the FMA shall be the sign of the product
|
||||
assign tentative_sign = operand_a.sign ^ operand_b.sign;
|
||||
|
||||
// ----------------------
|
||||
// Special case handling
|
||||
// ----------------------
|
||||
logic [WIDTH-1:0] special_result;
|
||||
fpnew_pkg::status_t special_status;
|
||||
logic result_is_special;
|
||||
|
||||
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result;
|
||||
fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status;
|
||||
logic [NUM_FORMATS-1:0] fmt_result_is_special;
|
||||
|
||||
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1;
|
||||
localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1);
|
||||
localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0;
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
always_comb begin : special_results
|
||||
logic [FP_WIDTH-1:0] special_res;
|
||||
|
||||
// Default assignment
|
||||
special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
fmt_special_status[fmt] = '0;
|
||||
fmt_result_is_special[fmt] = 1'b0;
|
||||
|
||||
// Handle potentially mixed nan & infinity input => important for the case where infinity and
|
||||
// zero are multiplied and added to a qnan.
|
||||
// RISC-V mandates raising the NV exception in these cases:
|
||||
// (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs)
|
||||
if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin
|
||||
fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
|
||||
fmt_special_status[fmt].NV = 1'b1; // invalid operation
|
||||
// NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP
|
||||
end else if (any_operand_nan) begin
|
||||
fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN
|
||||
fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling
|
||||
// Special cases involving infinity
|
||||
end else if (any_operand_inf) begin
|
||||
fmt_result_is_special[fmt] = 1'b1; // bypass FMA
|
||||
// Effective addition of opposite infinities (±inf - ±inf) is invalid!
|
||||
if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction)
|
||||
fmt_special_status[fmt].NV = 1'b1; // invalid operation
|
||||
// Handle cases where output will be inf because of inf product input
|
||||
else if (info_a.is_inf || info_b.is_inf) begin
|
||||
// Result is infinity with the sign of the product
|
||||
special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA};
|
||||
// Handle cases where the addend is inf
|
||||
end else if (info_c.is_inf) begin
|
||||
// Result is inifinity with sign of the addend (= operand_c)
|
||||
special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA};
|
||||
end
|
||||
end
|
||||
// Initialize special result with ones (NaN-box)
|
||||
fmt_special_result[fmt] = '1;
|
||||
fmt_special_result[fmt][FP_WIDTH-1:0] = special_res;
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
assign fmt_special_status[fmt] = '0;
|
||||
assign fmt_result_is_special[fmt] = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// Detect special case from source format, I2F casts don't produce a special result
|
||||
assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same
|
||||
// Signalling input NaNs raise invalid flag, otherwise no flags set
|
||||
assign special_status = fmt_special_status[dst_fmt_q];
|
||||
// Assemble result according to destination format
|
||||
assign special_result = fmt_special_result[dst_fmt_q]; // destination format
|
||||
|
||||
// ---------------------------
|
||||
// Initial exponent data path
|
||||
// ---------------------------
|
||||
logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c;
|
||||
logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference;
|
||||
logic signed [EXP_WIDTH-1:0] tentative_exponent;
|
||||
|
||||
// Zero-extend exponents into signed container - implicit width extension
|
||||
assign exponent_a = signed'({1'b0, operand_a.exponent});
|
||||
assign exponent_b = signed'({1'b0, operand_b.exponent});
|
||||
assign exponent_c = signed'({1'b0, operand_c.exponent});
|
||||
|
||||
// Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx)
|
||||
// with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt.
|
||||
assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm
|
||||
// Biased product exponent is the sum of encoded exponents minus the bias.
|
||||
assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp.
|
||||
? 2 - signed'(fpnew_pkg::bias(dst_fmt_q))
|
||||
: signed'(exponent_a + info_a.is_subnormal
|
||||
+ exponent_b + info_b.is_subnormal
|
||||
- 2*signed'(fpnew_pkg::bias(src_fmt_q))
|
||||
+ signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt
|
||||
// Exponent difference is the addend exponent minus the product exponent
|
||||
assign exponent_difference = exponent_addend - exponent_product;
|
||||
// The tentative exponent will be the larger of the product or addend exponent
|
||||
assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product;
|
||||
|
||||
// Shift amount for addend based on exponents (unsigned as only right shifts)
|
||||
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt;
|
||||
|
||||
always_comb begin : addend_shift_amount
|
||||
// Product-anchored case, saturated shift (addend is only in the sticky bit)
|
||||
if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1))
|
||||
addend_shamt = 3 * PRECISION_BITS + 4;
|
||||
// Addend and product will have mutual bits to add
|
||||
else if (exponent_difference <= signed'(PRECISION_BITS + 2))
|
||||
addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference);
|
||||
// Addend-anchored case, saturated shift (product is only in the sticky bit)
|
||||
else
|
||||
addend_shamt = 0;
|
||||
end
|
||||
|
||||
// ------------------
|
||||
// Product data path
|
||||
// ------------------
|
||||
logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c;
|
||||
logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide
|
||||
logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R)
|
||||
|
||||
// Add implicit bits to mantissae
|
||||
assign mantissa_a = {info_a.is_normal, operand_a.mantissa};
|
||||
assign mantissa_b = {info_b.is_normal, operand_b.mantissa};
|
||||
assign mantissa_c = {info_c.is_normal, operand_c.mantissa};
|
||||
|
||||
// Mantissa multiplier (a*b)
|
||||
assign product = mantissa_a * mantissa_b;
|
||||
|
||||
// Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky:
|
||||
// | 000...000 | product | RS |
|
||||
// <- p+2 -> <- 2p -> < 2>
|
||||
assign product_shifted = product << 2; // constant shift
|
||||
|
||||
// -----------------
|
||||
// Addend data path
|
||||
// -----------------
|
||||
logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on
|
||||
logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky
|
||||
logic sticky_before_add; // they are compressed into a single sticky bit
|
||||
logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R)
|
||||
logic inject_carry_in; // inject carry for subtractions if needed
|
||||
|
||||
// In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are
|
||||
// shifted out and compressed into a sticky bit.
|
||||
// BEFORE THE SHIFT:
|
||||
// | mantissa_c | 000..000 |
|
||||
// <- p -> <- 3p+4 ->
|
||||
// AFTER THE SHIFT:
|
||||
// | 000..........000 | mantissa_c | 000...............0GR | sticky bits |
|
||||
// <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p ->
|
||||
assign {addend_after_shift, addend_sticky_bits} =
|
||||
(mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt;
|
||||
|
||||
assign sticky_before_add = (| addend_sticky_bits);
|
||||
|
||||
// In case of a subtraction, the addend is inverted
|
||||
assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift;
|
||||
assign inject_carry_in = effective_subtraction & ~sticky_before_add;
|
||||
|
||||
// ------
|
||||
// Adder
|
||||
// ------
|
||||
logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry
|
||||
logic sum_carry; // observe carry bit from sum for sign fixing
|
||||
logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow
|
||||
logic final_sign;
|
||||
|
||||
//Mantissa adder (ab+c). In normal addition, it cannot overflow.
|
||||
assign sum_raw = product_shifted + addend_shifted + inject_carry_in;
|
||||
assign sum_carry = sum_raw[3*PRECISION_BITS+4];
|
||||
|
||||
// Complement negative sum (can only happen in subtraction -> overflows for positive results)
|
||||
assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw;
|
||||
|
||||
// In case of a mispredicted subtraction result, do a sign flip
|
||||
assign final_sign = (effective_subtraction && (sum_carry == tentative_sign))
|
||||
? 1'b1
|
||||
: (effective_subtraction ? 1'b0 : tentative_sign);
|
||||
|
||||
// ---------------
|
||||
// Internal pipeline
|
||||
// ---------------
|
||||
// Pipeline output signals as non-arrays
|
||||
logic effective_subtraction_q;
|
||||
logic signed [EXP_WIDTH-1:0] exponent_product_q;
|
||||
logic signed [EXP_WIDTH-1:0] exponent_difference_q;
|
||||
logic signed [EXP_WIDTH-1:0] tentative_exponent_q;
|
||||
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
|
||||
logic sticky_before_add_q;
|
||||
logic [3*PRECISION_BITS+3:0] sum_q;
|
||||
logic final_sign_q;
|
||||
fpnew_pkg::fp_format_e dst_fmt_q2;
|
||||
fpnew_pkg::roundmode_e rnd_mode_q;
|
||||
logic result_is_special_q;
|
||||
fp_t special_result_q;
|
||||
fpnew_pkg::status_t special_status_q;
|
||||
// Internal pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q;
|
||||
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q;
|
||||
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q;
|
||||
logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q;
|
||||
logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
|
||||
logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
|
||||
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q;
|
||||
fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q;
|
||||
fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q;
|
||||
TagType [0:NUM_MID_REGS] mid_pipe_tag_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_mask_q;
|
||||
AuxType [0:NUM_MID_REGS] mid_pipe_aux_q;
|
||||
logic [0:NUM_MID_REGS] mid_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_MID_REGS] mid_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from upstream logic
|
||||
assign mid_pipe_eff_sub_q[0] = effective_subtraction;
|
||||
assign mid_pipe_exp_prod_q[0] = exponent_product;
|
||||
assign mid_pipe_exp_diff_q[0] = exponent_difference;
|
||||
assign mid_pipe_tent_exp_q[0] = tentative_exponent;
|
||||
assign mid_pipe_add_shamt_q[0] = addend_shamt;
|
||||
assign mid_pipe_sticky_q[0] = sticky_before_add;
|
||||
assign mid_pipe_sum_q[0] = sum;
|
||||
assign mid_pipe_final_sign_q[0] = final_sign;
|
||||
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
|
||||
assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
|
||||
assign mid_pipe_res_is_spec_q[0] = result_is_special;
|
||||
assign mid_pipe_spec_res_q[0] = special_result;
|
||||
assign mid_pipe_spec_stat_q[0] = special_status;
|
||||
assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
|
||||
assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
|
||||
assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
|
||||
assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to input pipe
|
||||
assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0];
|
||||
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
|
||||
`FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: assign selected pipe outputs to signals for later use
|
||||
assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS];
|
||||
assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS];
|
||||
assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS];
|
||||
assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS];
|
||||
assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
|
||||
assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
|
||||
assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
|
||||
assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
|
||||
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
|
||||
assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
|
||||
assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS];
|
||||
assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS];
|
||||
assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS];
|
||||
|
||||
// --------------
|
||||
// Normalization
|
||||
// --------------
|
||||
logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
|
||||
logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
|
||||
logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
|
||||
logic lzc_zeroes; // in case only zeroes found
|
||||
|
||||
logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
|
||||
logic signed [EXP_WIDTH-1:0] normalized_exponent;
|
||||
|
||||
logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift
|
||||
logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit
|
||||
logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization
|
||||
logic sticky_after_norm; // sticky bit after normalization
|
||||
|
||||
logic signed [EXP_WIDTH-1:0] final_exponent;
|
||||
|
||||
assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];
|
||||
|
||||
// Leading zero counter for cancellations
|
||||
lzc #(
|
||||
.WIDTH ( LOWER_SUM_WIDTH ),
|
||||
.MODE ( 1 ) // MODE = 1 counts leading zeroes
|
||||
) i_lzc (
|
||||
.in_i ( sum_lower ),
|
||||
.cnt_o ( leading_zero_count ),
|
||||
.empty_o ( lzc_zeroes )
|
||||
);
|
||||
|
||||
assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});
|
||||
|
||||
// Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
|
||||
always_comb begin : norm_shift_amount
|
||||
// Product-anchored case or cancellations require LZC
|
||||
if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
|
||||
// Normal result (biased exponent > 0 and not a zero)
|
||||
if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
|
||||
// Undo initial product shift, remove the counted zeroes
|
||||
norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
|
||||
normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
|
||||
// Subnormal result
|
||||
end else begin
|
||||
// Cap the shift distance to align mantissa with minimum exponent
|
||||
norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
|
||||
normalized_exponent = 0; // subnormals encoded as 0
|
||||
end
|
||||
// Addend-anchored case
|
||||
end else begin
|
||||
norm_shamt = addend_shamt_q; // Undo the initial shift
|
||||
normalized_exponent = tentative_exponent_q;
|
||||
end
|
||||
end
|
||||
|
||||
// Do the large normalization shift
|
||||
assign sum_shifted = sum_q << norm_shamt;
|
||||
|
||||
// The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left
|
||||
// or right of the (non-carry) MSB of the sum.
|
||||
always_comb begin : small_norm
|
||||
// Default assignment, discarding carry bit
|
||||
{final_mantissa, sum_sticky_bits} = sum_shifted;
|
||||
final_exponent = normalized_exponent;
|
||||
|
||||
// The normalized sum has overflown, align right and fix exponent
|
||||
if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit
|
||||
{final_mantissa, sum_sticky_bits} = sum_shifted >> 1;
|
||||
final_exponent = normalized_exponent + 1;
|
||||
// The normalized sum is normal, nothing to do
|
||||
end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB
|
||||
// do nothing
|
||||
// The normalized sum is still denormal, align left - unless the result is not already subnormal
|
||||
end else if (normalized_exponent > 1) begin
|
||||
{final_mantissa, sum_sticky_bits} = sum_shifted << 1;
|
||||
final_exponent = normalized_exponent - 1;
|
||||
// Otherwise we're denormal
|
||||
end else begin
|
||||
final_exponent = '0;
|
||||
end
|
||||
end
|
||||
|
||||
// Update the sticky bit with the shifted-out bits
|
||||
assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q;
|
||||
|
||||
// ----------------------------
|
||||
// Rounding and classification
|
||||
// ----------------------------
|
||||
logic pre_round_sign;
|
||||
logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding
|
||||
logic [1:0] round_sticky_bits;
|
||||
|
||||
logic of_before_round, of_after_round; // overflow
|
||||
logic uf_before_round, uf_after_round; // underflow
|
||||
|
||||
logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format
|
||||
logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits;
|
||||
|
||||
logic [NUM_FORMATS-1:0] fmt_of_after_round;
|
||||
logic [NUM_FORMATS-1:0] fmt_uf_after_round;
|
||||
|
||||
logic rounded_sign;
|
||||
logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding
|
||||
logic result_zero;
|
||||
|
||||
// Classification before round. RISC-V mandates checking underflow AFTER rounding!
|
||||
assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones
|
||||
assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0
|
||||
|
||||
// Pack exponent and mantissa into proper rounding form
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble
|
||||
// Set up some constants
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
logic [EXP_BITS-1:0] pre_round_exponent;
|
||||
logic [MAN_BITS-1:0] pre_round_mantissa;
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
|
||||
assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0];
|
||||
assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS];
|
||||
// Assemble result before rounding. In case of overflow, the largest normal value is set.
|
||||
assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend
|
||||
|
||||
// Round bit is after mantissa (1 in case of overflow for rounding)
|
||||
assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] |
|
||||
of_before_round;
|
||||
|
||||
// remaining bits in mantissa to sticky (1 in case of overflow for rounding)
|
||||
if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky
|
||||
assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) |
|
||||
sticky_after_norm | of_before_round;
|
||||
end else begin : normal_sticky
|
||||
assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round;
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Assemble result before rounding. In case of overflow, the largest normal value is set.
|
||||
assign pre_round_sign = final_sign_q;
|
||||
assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2];
|
||||
|
||||
// In case of overflow, the round and sticky bits are set for proper rounding
|
||||
assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2];
|
||||
|
||||
// Perform the rounding
|
||||
fpnew_rounding #(
|
||||
.AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS )
|
||||
) i_fpnew_rounding (
|
||||
.abs_value_i ( pre_round_abs ),
|
||||
.sign_i ( pre_round_sign ),
|
||||
.round_sticky_bits_i ( round_sticky_bits ),
|
||||
.rnd_mode_i ( rnd_mode_q ),
|
||||
.effective_subtraction_i ( effective_subtraction_q ),
|
||||
.abs_rounded_o ( rounded_abs ),
|
||||
.sign_o ( rounded_sign ),
|
||||
.exact_zero_o ( result_zero )
|
||||
);
|
||||
|
||||
logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result;
|
||||
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt));
|
||||
|
||||
if (FpFmtConfig[fmt]) begin : active_format
|
||||
always_comb begin : post_process
|
||||
// detect of / uf
|
||||
fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal
|
||||
fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp.
|
||||
|
||||
// Assemble regular result, nan box short ones.
|
||||
fmt_result[fmt] = '1;
|
||||
fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]};
|
||||
end
|
||||
end else begin : inactive_format
|
||||
assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE;
|
||||
assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE;
|
||||
assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE};
|
||||
end
|
||||
end
|
||||
|
||||
// Classification after rounding select by destination format
|
||||
assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
|
||||
assign of_after_round = fmt_of_after_round[dst_fmt_q2];
|
||||
|
||||
|
||||
// -----------------
|
||||
// Result selection
|
||||
// -----------------
|
||||
logic [WIDTH-1:0] regular_result;
|
||||
fpnew_pkg::status_t regular_status;
|
||||
|
||||
// Assemble regular result
|
||||
assign regular_result = fmt_result[dst_fmt_q2];
|
||||
assign regular_status.NV = 1'b0; // only valid cases are handled in regular path
|
||||
assign regular_status.DZ = 1'b0; // no divisions
|
||||
assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow
|
||||
assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF
|
||||
assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round;
|
||||
|
||||
// Final results for output pipeline
|
||||
logic [WIDTH-1:0] result_d;
|
||||
fpnew_pkg::status_t status_d;
|
||||
|
||||
// Select output depending on special case detection
|
||||
assign result_d = result_is_special_q ? special_result_q : regular_result;
|
||||
assign status_d = result_is_special_q ? special_status_q : regular_status;
|
||||
|
||||
// ----------------
|
||||
// Output Pipeline
|
||||
// ----------------
|
||||
// Output pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q;
|
||||
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
|
||||
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
|
||||
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_OUT_REGS] out_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign out_pipe_result_q[0] = result_d;
|
||||
assign out_pipe_status_q[0] = status_d;
|
||||
assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS];
|
||||
assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS];
|
||||
assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS];
|
||||
assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to inside pipe
|
||||
assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
|
||||
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
|
||||
// Output stage: assign module outputs
|
||||
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
|
||||
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
|
||||
assign extension_bit_o = 1'b1; // always NaN-Box result
|
||||
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
|
||||
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
|
||||
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
|
||||
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
|
||||
assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});
|
||||
endmodule
|
415
vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
vendored
415
vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv
vendored
|
@ -1,415 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
module fpnew_noncomp #(
|
||||
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
parameter type AuxType = logic,
|
||||
|
||||
localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [1:0][WIDTH-1:0] operands_i, // 2 operands
|
||||
input logic [1:0] is_boxed_i, // 2 operands
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input TagType tag_i,
|
||||
input logic mask_i,
|
||||
input AuxType aux_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [WIDTH-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output fpnew_pkg::classmask_e class_mask_o,
|
||||
output logic is_class_o,
|
||||
output TagType tag_o,
|
||||
output logic mask_o,
|
||||
output AuxType aux_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
// ----------
|
||||
// Constants
|
||||
// ----------
|
||||
localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat);
|
||||
localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat);
|
||||
// Pipelines
|
||||
localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE)
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? ((NumPipeRegs + 1) / 2) // First to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER
|
||||
? NumPipeRegs
|
||||
: (PipeConfig == fpnew_pkg::DISTRIBUTED
|
||||
? (NumPipeRegs / 2) // Last to get distributed regs
|
||||
: 0); // no regs here otherwise
|
||||
|
||||
// ----------------
|
||||
// Type definition
|
||||
// ----------------
|
||||
typedef struct packed {
|
||||
logic sign;
|
||||
logic [EXP_BITS-1:0] exponent;
|
||||
logic [MAN_BITS-1:0] mantissa;
|
||||
} fp_t;
|
||||
|
||||
// ---------------
|
||||
// Input pipeline
|
||||
// ---------------
|
||||
// Input pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q;
|
||||
logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q;
|
||||
fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q;
|
||||
fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_op_mod_q;
|
||||
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
|
||||
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
|
||||
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_INP_REGS] inp_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign inp_pipe_operands_q[0] = operands_i;
|
||||
assign inp_pipe_is_boxed_q[0] = is_boxed_i;
|
||||
assign inp_pipe_rnd_mode_q[0] = rnd_mode_i;
|
||||
assign inp_pipe_op_q[0] = op_i;
|
||||
assign inp_pipe_op_mod_q[0] = op_mod_i;
|
||||
assign inp_pipe_tag_q[0] = tag_i;
|
||||
assign inp_pipe_mask_q[0] = mask_i;
|
||||
assign inp_pipe_aux_q[0] = aux_i;
|
||||
assign inp_pipe_valid_q[0] = in_valid_i;
|
||||
// Input stage: Propagate pipeline ready signal to updtream circuitry
|
||||
assign in_ready_o = inp_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
|
||||
`FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD)
|
||||
`FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
|
||||
// ---------------------
|
||||
// Input classification
|
||||
// ---------------------
|
||||
fpnew_pkg::fp_info_t [1:0] info_q;
|
||||
|
||||
// Classify input
|
||||
fpnew_classifier #(
|
||||
.FpFormat ( FpFormat ),
|
||||
.NumOperands ( 2 )
|
||||
) i_class_a (
|
||||
.operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ),
|
||||
.is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ),
|
||||
.info_o ( info_q )
|
||||
);
|
||||
|
||||
fp_t operand_a, operand_b;
|
||||
fpnew_pkg::fp_info_t info_a, info_b;
|
||||
|
||||
// Packing-order-agnostic assignments
|
||||
assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0];
|
||||
assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1];
|
||||
assign info_a = info_q[0];
|
||||
assign info_b = info_q[1];
|
||||
|
||||
logic any_operand_inf;
|
||||
logic any_operand_nan;
|
||||
logic signalling_nan;
|
||||
|
||||
// Reduction for special case handling
|
||||
assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf});
|
||||
assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan});
|
||||
assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling});
|
||||
|
||||
logic operands_equal, operand_a_smaller;
|
||||
|
||||
// Equality checks for zeroes too
|
||||
assign operands_equal = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero);
|
||||
// Invert result if non-zero signs involved (unsigned comparison)
|
||||
assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign);
|
||||
|
||||
// ---------------
|
||||
// Sign Injection
|
||||
// ---------------
|
||||
fp_t sgnj_result;
|
||||
fpnew_pkg::status_t sgnj_status;
|
||||
logic sgnj_extension_bit;
|
||||
|
||||
// Sign Injection - operation is encoded in rnd_mode_q:
|
||||
// RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check)
|
||||
always_comb begin : sign_injections
|
||||
logic sign_a, sign_b; // internal signs
|
||||
// Default assignment
|
||||
sgnj_result = operand_a; // result based on operand a
|
||||
|
||||
// NaN-boxing check will treat invalid inputs as canonical NaNs
|
||||
if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)};
|
||||
|
||||
// Internal signs are treated as positive in case of non-NaN-boxed values
|
||||
sign_a = operand_a.sign & info_a.is_boxed;
|
||||
sign_b = operand_b.sign & info_b.is_boxed;
|
||||
|
||||
// Do the sign injection based on rm field
|
||||
unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
|
||||
fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ
|
||||
fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN
|
||||
fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX
|
||||
fpnew_pkg::RUP: sgnj_result = operand_a; // passthrough
|
||||
default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
|
||||
endcase
|
||||
end
|
||||
|
||||
assign sgnj_status = '0; // sign injections never raise exceptions
|
||||
|
||||
// op_mod_q enables integer sign-extension of result (for storing to integer regfile)
|
||||
assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1;
|
||||
|
||||
// ------------------
|
||||
// Minimum / Maximum
|
||||
// ------------------
|
||||
fp_t minmax_result;
|
||||
fpnew_pkg::status_t minmax_status;
|
||||
logic minmax_extension_bit;
|
||||
|
||||
// Minimum/Maximum - operation is encoded in rnd_mode_q:
|
||||
// RNE = MIN, RTZ = MAX
|
||||
always_comb begin : min_max
|
||||
// Default assignment
|
||||
minmax_status = '0;
|
||||
|
||||
// Min/Max use quiet comparisons - only sNaN are invalid
|
||||
minmax_status.NV = signalling_nan;
|
||||
|
||||
// Both NaN inputs cause a NaN output
|
||||
if (info_a.is_nan && info_b.is_nan)
|
||||
minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN
|
||||
// If one operand is NaN, the non-NaN operand is returned
|
||||
else if (info_a.is_nan) minmax_result = operand_b;
|
||||
else if (info_b.is_nan) minmax_result = operand_a;
|
||||
// Otherwise decide according to the operation
|
||||
else begin
|
||||
unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
|
||||
fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN
|
||||
fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX
|
||||
default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value
|
||||
|
||||
// ------------
|
||||
// Comparisons
|
||||
// ------------
|
||||
fp_t cmp_result;
|
||||
fpnew_pkg::status_t cmp_status;
|
||||
logic cmp_extension_bit;
|
||||
|
||||
// Comparisons - operation is encoded in rnd_mode_q:
|
||||
// RNE = LE, RTZ = LT, RDN = EQ
|
||||
// op_mod_q inverts boolean outputs
|
||||
always_comb begin : comparisons
|
||||
// Default assignment
|
||||
cmp_result = '0; // false
|
||||
cmp_status = '0; // no flags
|
||||
|
||||
// Signalling NaNs always compare as false and are illegal
|
||||
if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation
|
||||
// Otherwise do comparisons
|
||||
else begin
|
||||
unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS])
|
||||
fpnew_pkg::RNE: begin // Less than or equal
|
||||
if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
|
||||
else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
|
||||
end
|
||||
fpnew_pkg::RTZ: begin // Less than
|
||||
if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid
|
||||
else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS];
|
||||
end
|
||||
fpnew_pkg::RDN: begin // Equal
|
||||
if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal
|
||||
else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS];
|
||||
end
|
||||
default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers
|
||||
|
||||
// ---------------
|
||||
// Classification
|
||||
// ---------------
|
||||
fpnew_pkg::status_t class_status;
|
||||
logic class_extension_bit;
|
||||
fpnew_pkg::classmask_e class_mask_d; // the result is actually here
|
||||
|
||||
// Classification - always return the classification mask on the dedicated port
|
||||
always_comb begin : classify
|
||||
if (info_a.is_normal) begin
|
||||
class_mask_d = operand_a.sign ? fpnew_pkg::NEGNORM : fpnew_pkg::POSNORM;
|
||||
end else if (info_a.is_subnormal) begin
|
||||
class_mask_d = operand_a.sign ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM;
|
||||
end else if (info_a.is_zero) begin
|
||||
class_mask_d = operand_a.sign ? fpnew_pkg::NEGZERO : fpnew_pkg::POSZERO;
|
||||
end else if (info_a.is_inf) begin
|
||||
class_mask_d = operand_a.sign ? fpnew_pkg::NEGINF : fpnew_pkg::POSINF;
|
||||
end else if (info_a.is_nan) begin
|
||||
class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN : fpnew_pkg::QNAN;
|
||||
end else begin
|
||||
class_mask_d = fpnew_pkg::QNAN; // default value
|
||||
end
|
||||
end
|
||||
|
||||
assign class_status = '0; // classification does not set flags
|
||||
assign class_extension_bit = 1'b0; // classification always produces results in integer registers
|
||||
|
||||
// -----------------
|
||||
// Result selection
|
||||
// -----------------
|
||||
fp_t result_d;
|
||||
fpnew_pkg::status_t status_d;
|
||||
logic extension_bit_d;
|
||||
logic is_class_d;
|
||||
|
||||
// Select result
|
||||
always_comb begin : select_result
|
||||
unique case (inp_pipe_op_q[NUM_INP_REGS])
|
||||
fpnew_pkg::SGNJ: begin
|
||||
result_d = sgnj_result;
|
||||
status_d = sgnj_status;
|
||||
extension_bit_d = sgnj_extension_bit;
|
||||
end
|
||||
fpnew_pkg::MINMAX: begin
|
||||
result_d = minmax_result;
|
||||
status_d = minmax_status;
|
||||
extension_bit_d = minmax_extension_bit;
|
||||
end
|
||||
fpnew_pkg::CMP: begin
|
||||
result_d = cmp_result;
|
||||
status_d = cmp_status;
|
||||
extension_bit_d = cmp_extension_bit;
|
||||
end
|
||||
fpnew_pkg::CLASSIFY: begin
|
||||
result_d = '{default: fpnew_pkg::DONT_CARE}; // unused
|
||||
status_d = class_status;
|
||||
extension_bit_d = class_extension_bit;
|
||||
end
|
||||
default: begin
|
||||
result_d = '{default: fpnew_pkg::DONT_CARE}; // dont care
|
||||
status_d = '{default: fpnew_pkg::DONT_CARE}; // dont care
|
||||
extension_bit_d = fpnew_pkg::DONT_CARE; // dont care
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY);
|
||||
|
||||
// ----------------
|
||||
// Output Pipeline
|
||||
// ----------------
|
||||
// Output pipeline signals, index i holds signal after i register stages
|
||||
fp_t [0:NUM_OUT_REGS] out_pipe_result_q;
|
||||
fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q;
|
||||
fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_is_class_q;
|
||||
TagType [0:NUM_OUT_REGS] out_pipe_tag_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_mask_q;
|
||||
AuxType [0:NUM_OUT_REGS] out_pipe_aux_q;
|
||||
logic [0:NUM_OUT_REGS] out_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NUM_OUT_REGS] out_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign out_pipe_result_q[0] = result_d;
|
||||
assign out_pipe_status_q[0] = status_d;
|
||||
assign out_pipe_extension_bit_q[0] = extension_bit_d;
|
||||
assign out_pipe_class_mask_q[0] = class_mask_d;
|
||||
assign out_pipe_is_class_q[0] = is_class_d;
|
||||
assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS];
|
||||
assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS];
|
||||
assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS];
|
||||
assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS];
|
||||
// Input stage: Propagate pipeline ready signal to inside pipe
|
||||
assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0];
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN)
|
||||
`FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0))
|
||||
`FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0)
|
||||
`FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0))
|
||||
end
|
||||
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
|
||||
assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i;
|
||||
// Output stage: assign module outputs
|
||||
assign result_o = out_pipe_result_q[NUM_OUT_REGS];
|
||||
assign status_o = out_pipe_status_q[NUM_OUT_REGS];
|
||||
assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS];
|
||||
assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS];
|
||||
assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS];
|
||||
assign tag_o = out_pipe_tag_q[NUM_OUT_REGS];
|
||||
assign mask_o = out_pipe_mask_q[NUM_OUT_REGS];
|
||||
assign aux_o = out_pipe_aux_q[NUM_OUT_REGS];
|
||||
assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS];
|
||||
assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q});
|
||||
endmodule
|
244
vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
vendored
244
vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv
vendored
|
@ -1,244 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
module fpnew_opgroup_block #(
|
||||
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL,
|
||||
// FPU configuration
|
||||
parameter int unsigned Width = 32,
|
||||
parameter logic EnableVectors = 1'b1,
|
||||
parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1,
|
||||
parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1,
|
||||
parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0},
|
||||
parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL},
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
parameter int unsigned TrueSIMDClass = 0,
|
||||
// Do not change
|
||||
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS,
|
||||
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
|
||||
localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors),
|
||||
localparam type MaskType = logic [NUM_LANES-1:0]
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
|
||||
input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input fpnew_pkg::fp_format_e src_fmt_i,
|
||||
input fpnew_pkg::fp_format_e dst_fmt_i,
|
||||
input fpnew_pkg::int_format_e int_fmt_i,
|
||||
input logic vectorial_op_i,
|
||||
input TagType tag_i,
|
||||
input MaskType simd_mask_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [Width-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
// ----------------
|
||||
// Type Definition
|
||||
// ----------------
|
||||
typedef struct packed {
|
||||
logic [Width-1:0] result;
|
||||
fpnew_pkg::status_t status;
|
||||
logic ext_bit;
|
||||
TagType tag;
|
||||
} output_t;
|
||||
|
||||
// Handshake signals for the slices
|
||||
logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy;
|
||||
output_t [NUM_FORMATS-1:0] fmt_outputs;
|
||||
|
||||
// -----------
|
||||
// Input Side
|
||||
// -----------
|
||||
assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format
|
||||
|
||||
// -------------------------
|
||||
// Generate Parallel Slices
|
||||
// -------------------------
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices
|
||||
// Some constants for this format
|
||||
localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask);
|
||||
localparam logic IS_FIRST_MERGED =
|
||||
fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask);
|
||||
|
||||
// Generate slice only if format enabled
|
||||
if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format
|
||||
|
||||
logic in_valid;
|
||||
|
||||
assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format
|
||||
|
||||
// Forward masks related to the right SIMD lane
|
||||
localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors);
|
||||
logic [INTERNAL_LANES-1:0] mask_slice;
|
||||
always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b];
|
||||
|
||||
fpnew_opgroup_fmt_slice #(
|
||||
.OpGroup ( OpGroup ),
|
||||
.FpFormat ( fpnew_pkg::fp_format_e'(fmt) ),
|
||||
.Width ( Width ),
|
||||
.EnableVectors ( EnableVectors ),
|
||||
.NumPipeRegs ( FmtPipeRegs[fmt] ),
|
||||
.PipeConfig ( PipeConfig ),
|
||||
.TagType ( TagType ),
|
||||
.TrueSIMDClass ( TrueSIMDClass )
|
||||
) i_fmt_slice (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( operands_i ),
|
||||
.is_boxed_i ( is_boxed_i[fmt] ),
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.vectorial_op_i,
|
||||
.tag_i,
|
||||
.simd_mask_i ( mask_slice ),
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( fmt_in_ready[fmt] ),
|
||||
.flush_i,
|
||||
.result_o ( fmt_outputs[fmt].result ),
|
||||
.status_o ( fmt_outputs[fmt].status ),
|
||||
.extension_bit_o( fmt_outputs[fmt].ext_bit ),
|
||||
.tag_o ( fmt_outputs[fmt].tag ),
|
||||
.out_valid_o ( fmt_out_valid[fmt] ),
|
||||
.out_ready_i ( fmt_out_ready[fmt] ),
|
||||
.busy_o ( fmt_busy[fmt] )
|
||||
);
|
||||
// If the format wants to use merged ops, tie off the dangling ones not used here
|
||||
end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused
|
||||
|
||||
localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
|
||||
// Ready is split up into formats
|
||||
assign fmt_in_ready[fmt] = fmt_in_ready[int'(FMT)];
|
||||
|
||||
assign fmt_out_valid[fmt] = 1'b0; // don't emit values
|
||||
assign fmt_busy[fmt] = 1'b0; // never busy
|
||||
// Outputs are don't care
|
||||
assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE};
|
||||
assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE};
|
||||
assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
|
||||
assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE);
|
||||
|
||||
// Tie off disabled formats
|
||||
end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt
|
||||
assign fmt_in_ready[fmt] = 1'b0; // don't accept operations
|
||||
assign fmt_out_valid[fmt] = 1'b0; // don't emit values
|
||||
assign fmt_busy[fmt] = 1'b0; // never busy
|
||||
// Outputs are don't care
|
||||
assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE};
|
||||
assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE};
|
||||
assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE;
|
||||
assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE);
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Generate Merged Slice
|
||||
// ----------------------
|
||||
if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice
|
||||
|
||||
localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask);
|
||||
localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask);
|
||||
|
||||
logic in_valid;
|
||||
|
||||
assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED);
|
||||
|
||||
fpnew_opgroup_multifmt_slice #(
|
||||
.OpGroup ( OpGroup ),
|
||||
.Width ( Width ),
|
||||
.FpFmtConfig ( FpFmtMask ),
|
||||
.IntFmtConfig ( IntFmtMask ),
|
||||
.EnableVectors ( EnableVectors ),
|
||||
.NumPipeRegs ( REG ),
|
||||
.PipeConfig ( PipeConfig ),
|
||||
.TagType ( TagType )
|
||||
) i_multifmt_slice (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i,
|
||||
.is_boxed_i,
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.src_fmt_i,
|
||||
.dst_fmt_i,
|
||||
.int_fmt_i,
|
||||
.vectorial_op_i,
|
||||
.tag_i,
|
||||
.simd_mask_i ( simd_mask_i ),
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( fmt_in_ready[FMT] ),
|
||||
.flush_i,
|
||||
.result_o ( fmt_outputs[FMT].result ),
|
||||
.status_o ( fmt_outputs[FMT].status ),
|
||||
.extension_bit_o ( fmt_outputs[FMT].ext_bit ),
|
||||
.tag_o ( fmt_outputs[FMT].tag ),
|
||||
.out_valid_o ( fmt_out_valid[FMT] ),
|
||||
.out_ready_i ( fmt_out_ready[FMT] ),
|
||||
.busy_o ( fmt_busy[FMT] )
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
// ------------------
|
||||
// Arbitrate Outputs
|
||||
// ------------------
|
||||
output_t arbiter_output;
|
||||
|
||||
// Round-Robin arbiter to decide which result to use
|
||||
rr_arb_tree #(
|
||||
.NumIn ( NUM_FORMATS ),
|
||||
.DataType ( output_t ),
|
||||
.AxiVldRdy ( 1'b1 )
|
||||
) i_arbiter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.rr_i ( '0 ),
|
||||
.req_i ( fmt_out_valid ),
|
||||
.gnt_o ( fmt_out_ready ),
|
||||
.data_i ( fmt_outputs ),
|
||||
.gnt_i ( out_ready_i ),
|
||||
.req_o ( out_valid_o ),
|
||||
.data_o ( arbiter_output ),
|
||||
.idx_o ( /* unused */ )
|
||||
);
|
||||
|
||||
// Unpack output
|
||||
assign result_o = arbiter_output.result;
|
||||
assign status_o = arbiter_output.status;
|
||||
assign extension_bit_o = arbiter_output.ext_bit;
|
||||
assign tag_o = arbiter_output.tag;
|
||||
|
||||
assign busy_o = (| fmt_busy);
|
||||
|
||||
endmodule
|
|
@ -1,292 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
module fpnew_opgroup_fmt_slice #(
|
||||
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL,
|
||||
parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0),
|
||||
// FPU configuration
|
||||
parameter int unsigned Width = 32,
|
||||
parameter logic EnableVectors = 1'b1,
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
parameter int unsigned TrueSIMDClass = 0,
|
||||
// Do not change
|
||||
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
|
||||
localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors),
|
||||
localparam type MaskType = logic [NUM_LANES-1:0]
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
|
||||
input logic [NUM_OPERANDS-1:0] is_boxed_i,
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input logic vectorial_op_i,
|
||||
input TagType tag_i,
|
||||
input MaskType simd_mask_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [Width-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat);
|
||||
localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES);
|
||||
|
||||
|
||||
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
|
||||
logic vectorial_op;
|
||||
|
||||
logic [NUM_LANES*FP_WIDTH-1:0] slice_result;
|
||||
logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result;
|
||||
|
||||
fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
|
||||
logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
|
||||
fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask;
|
||||
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
|
||||
logic [NUM_LANES-1:0] lane_masks;
|
||||
logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito
|
||||
|
||||
logic result_is_vector, result_is_class;
|
||||
|
||||
// -----------
|
||||
// Input Side
|
||||
// -----------
|
||||
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
|
||||
assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
|
||||
|
||||
// ---------------
|
||||
// Generate Lanes
|
||||
// ---------------
|
||||
for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
|
||||
logic [FP_WIDTH-1:0] local_result; // lane-local results
|
||||
logic local_sign;
|
||||
|
||||
// Generate instances only if needed, lane 0 always generated
|
||||
if ((lane == 0) || EnableVectors) begin : active_lane
|
||||
logic in_valid, out_valid, out_ready; // lane-local handshake
|
||||
|
||||
logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands
|
||||
logic [FP_WIDTH-1:0] op_result; // lane-local results
|
||||
fpnew_pkg::status_t op_status;
|
||||
|
||||
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
|
||||
// Slice out the operands for this lane
|
||||
always_comb begin : prepare_input
|
||||
for (int i = 0; i < int'(NUM_OPERANDS); i++) begin
|
||||
local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH];
|
||||
end
|
||||
end
|
||||
|
||||
// Instantiate the operation from the selected opgroup
|
||||
if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
|
||||
fpnew_fma #(
|
||||
.FpFormat ( FpFormat ),
|
||||
.NumPipeRegs ( NumPipeRegs ),
|
||||
.PipeConfig ( PipeConfig ),
|
||||
.TagType ( TagType ),
|
||||
.AuxType ( logic )
|
||||
) i_fma (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( local_operands ),
|
||||
.is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.tag_i,
|
||||
.mask_i ( simd_mask_i[lane] ),
|
||||
.aux_i ( vectorial_op ), // Remember whether operation was vectorial
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( lane_in_ready[lane] ),
|
||||
.flush_i,
|
||||
.result_o ( op_result ),
|
||||
.status_o ( op_status ),
|
||||
.extension_bit_o ( lane_ext_bit[lane] ),
|
||||
.tag_o ( lane_tags[lane] ),
|
||||
.mask_o ( lane_masks[lane] ),
|
||||
.aux_o ( lane_vectorial[lane] ),
|
||||
.out_valid_o ( out_valid ),
|
||||
.out_ready_i ( out_ready ),
|
||||
.busy_o ( lane_busy[lane] )
|
||||
);
|
||||
assign lane_is_class[lane] = 1'b0;
|
||||
assign lane_class_mask[lane] = fpnew_pkg::NEGINF;
|
||||
end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
|
||||
// fpnew_divsqrt #(
|
||||
// .FpFormat (FpFormat),
|
||||
// .NumPipeRegs(NumPipeRegs),
|
||||
// .PipeConfig (PipeConfig),
|
||||
// .TagType (TagType),
|
||||
// .AuxType (logic)
|
||||
// ) i_divsqrt (
|
||||
// .clk_i,
|
||||
// .rst_ni,
|
||||
// .operands_i ( local_operands ),
|
||||
// .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
|
||||
// .rnd_mode_i,
|
||||
// .op_i,
|
||||
// .op_mod_i,
|
||||
// .tag_i,
|
||||
// .aux_i ( vectorial_op ), // Remember whether operation was vectorial
|
||||
// .in_valid_i ( in_valid ),
|
||||
// .in_ready_o ( lane_in_ready[lane] ),
|
||||
// .flush_i,
|
||||
// .result_o ( op_result ),
|
||||
// .status_o ( op_status ),
|
||||
// .extension_bit_o ( lane_ext_bit[lane] ),
|
||||
// .tag_o ( lane_tags[lane] ),
|
||||
// .aux_o ( lane_vectorial[lane] ),
|
||||
// .out_valid_o ( out_valid ),
|
||||
// .out_ready_i ( out_ready ),
|
||||
// .busy_o ( lane_busy[lane] )
|
||||
// );
|
||||
// assign lane_is_class[lane] = 1'b0;
|
||||
end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
|
||||
fpnew_noncomp #(
|
||||
.FpFormat (FpFormat),
|
||||
.NumPipeRegs(NumPipeRegs),
|
||||
.PipeConfig (PipeConfig),
|
||||
.TagType (TagType),
|
||||
.AuxType (logic)
|
||||
) i_noncomp (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( local_operands ),
|
||||
.is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ),
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.tag_i,
|
||||
.mask_i ( simd_mask_i[lane] ),
|
||||
.aux_i ( vectorial_op ), // Remember whether operation was vectorial
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( lane_in_ready[lane] ),
|
||||
.flush_i,
|
||||
.result_o ( op_result ),
|
||||
.status_o ( op_status ),
|
||||
.extension_bit_o ( lane_ext_bit[lane] ),
|
||||
.class_mask_o ( lane_class_mask[lane] ),
|
||||
.is_class_o ( lane_is_class[lane] ),
|
||||
.tag_o ( lane_tags[lane] ),
|
||||
.mask_o ( lane_masks[lane] ),
|
||||
.aux_o ( lane_vectorial[lane] ),
|
||||
.out_valid_o ( out_valid ),
|
||||
.out_ready_i ( out_ready ),
|
||||
.busy_o ( lane_busy[lane] )
|
||||
);
|
||||
end // ADD OTHER OPTIONS HERE
|
||||
|
||||
// Handshakes are only done if the lane is actually used
|
||||
assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
|
||||
assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
|
||||
|
||||
// Properly NaN-box or sign-extend the slice result if not in use
|
||||
assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
|
||||
assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
|
||||
|
||||
// Otherwise generate constant sign-extension
|
||||
end else begin
|
||||
assign lane_out_valid[lane] = 1'b0; // unused lane
|
||||
assign lane_in_ready[lane] = 1'b0; // unused lane
|
||||
assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
|
||||
assign lane_status[lane] = '0;
|
||||
assign lane_busy[lane] = 1'b0;
|
||||
assign lane_is_class[lane] = 1'b0;
|
||||
end
|
||||
|
||||
// Insert lane result into slice result
|
||||
assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result;
|
||||
|
||||
// Create Classification results
|
||||
if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size
|
||||
assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane];
|
||||
assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0;
|
||||
end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size
|
||||
assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF ||
|
||||
lane_class_mask[lane] == fpnew_pkg::NEGNORM ||
|
||||
lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM ||
|
||||
lane_class_mask[lane] == fpnew_pkg::NEGZERO);
|
||||
// Write the current block segment
|
||||
assign slice_vec_class_result[(lane+1)*8-1:lane*8] = {
|
||||
local_sign, // BIT 7
|
||||
~local_sign, // BIT 6
|
||||
lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5
|
||||
lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4
|
||||
lane_class_mask[lane] == fpnew_pkg::POSZERO
|
||||
|| lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3
|
||||
lane_class_mask[lane] == fpnew_pkg::POSSUBNORM
|
||||
|| lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2
|
||||
lane_class_mask[lane] == fpnew_pkg::POSNORM
|
||||
|| lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1
|
||||
lane_class_mask[lane] == fpnew_pkg::POSINF
|
||||
|| lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0
|
||||
};
|
||||
end
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Output Side
|
||||
// ------------
|
||||
assign result_is_vector = lane_vectorial[0];
|
||||
assign result_is_class = lane_is_class[0];
|
||||
|
||||
assign slice_regular_result = $signed({extension_bit_o, slice_result});
|
||||
|
||||
localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8;
|
||||
|
||||
// Pad out unused vec_class bits if each classify result is on 8 bits
|
||||
if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin
|
||||
if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class
|
||||
assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
// localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1;
|
||||
|
||||
assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0];
|
||||
|
||||
// Select the proper result
|
||||
assign result_o = result_is_class ? slice_class_result : slice_regular_result;
|
||||
|
||||
assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused
|
||||
assign tag_o = lane_tags[0]; // upper lanes unused
|
||||
assign busy_o = (| lane_busy);
|
||||
assign out_valid_o = lane_out_valid[0]; // upper lanes unused
|
||||
|
||||
|
||||
// Collapse the lane status
|
||||
always_comb begin : output_processing
|
||||
// Collapse the status
|
||||
automatic fpnew_pkg::status_t temp_status;
|
||||
temp_status = '0;
|
||||
for (int i = 0; i < int'(NUM_LANES); i++)
|
||||
temp_status |= lane_status[i] & {5{lane_masks[i]}};
|
||||
status_o = temp_status;
|
||||
end
|
||||
endmodule
|
|
@ -1,449 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
`include "common_cells/registers.svh"
|
||||
|
||||
module fpnew_opgroup_multifmt_slice #(
|
||||
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV,
|
||||
parameter int unsigned Width = 64,
|
||||
// FPU configuration
|
||||
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
|
||||
parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
|
||||
parameter logic EnableVectors = 1'b1,
|
||||
parameter int unsigned NumPipeRegs = 0,
|
||||
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
|
||||
parameter type TagType = logic,
|
||||
// Do not change
|
||||
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
|
||||
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS,
|
||||
localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors),
|
||||
localparam type MaskType = logic [NUM_SIMD_LANES-1:0]
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
|
||||
input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input fpnew_pkg::fp_format_e src_fmt_i,
|
||||
input fpnew_pkg::fp_format_e dst_fmt_i,
|
||||
input fpnew_pkg::int_format_e int_fmt_i,
|
||||
input logic vectorial_op_i,
|
||||
input TagType tag_i,
|
||||
input MaskType simd_mask_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [Width-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output logic extension_bit_o,
|
||||
output TagType tag_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig);
|
||||
localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
|
||||
localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
|
||||
localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
|
||||
// We will send the format information along with the data
|
||||
localparam int unsigned FMT_BITS =
|
||||
fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
|
||||
localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
|
||||
|
||||
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes
|
||||
logic vectorial_op;
|
||||
logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation
|
||||
logic [AUX_BITS-1:0] aux_data;
|
||||
|
||||
// additional flags for CONV
|
||||
logic dst_fmt_is_int, dst_is_cpk;
|
||||
logic [1:0] dst_vec_op; // info for vectorial results (for packing)
|
||||
logic [2:0] target_aux_d, target_aux_q;
|
||||
logic is_up_cast, is_down_cast;
|
||||
|
||||
logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result;
|
||||
logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
|
||||
logic [Width-1:0] conv_slice_result;
|
||||
|
||||
|
||||
logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
|
||||
|
||||
fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
|
||||
logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
|
||||
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
|
||||
logic [NUM_LANES-1:0] lane_masks;
|
||||
logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
|
||||
logic [NUM_LANES-1:0] lane_busy; // dito
|
||||
|
||||
logic result_is_vector;
|
||||
logic [FMT_BITS-1:0] result_fmt;
|
||||
logic result_fmt_is_int, result_is_cpk;
|
||||
logic [1:0] result_vec_op; // info for vectorial results (for packing)
|
||||
|
||||
logic simd_synch_rdy, simd_synch_done;
|
||||
|
||||
// -----------
|
||||
// Input Side
|
||||
// -----------
|
||||
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
|
||||
assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
|
||||
|
||||
// Cast-and-Pack ops are encoded in operation and modifier
|
||||
assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
|
||||
assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
|
||||
op_i == fpnew_pkg::CPKCD);
|
||||
assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
|
||||
|
||||
assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
|
||||
assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
|
||||
|
||||
// The destination format is the int format for F2I casts
|
||||
assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
|
||||
|
||||
// The data sent along consists of the vectorial flag and format bits
|
||||
assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt};
|
||||
assign target_aux_d = {dst_vec_op, dst_is_cpk};
|
||||
|
||||
// CONV passes one operand for assembly after the unit: opC for cpk, opB for others
|
||||
if (OpGroup == fpnew_pkg::CONV) begin : conv_target
|
||||
assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
|
||||
end
|
||||
|
||||
// For 2-operand units, prepare boxing info
|
||||
logic [NUM_FORMATS-1:0] is_boxed_1op;
|
||||
logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
|
||||
|
||||
always_comb begin : boxed_2op
|
||||
for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
|
||||
is_boxed_1op[fmt] = is_boxed_i[fmt][0];
|
||||
is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
|
||||
end
|
||||
end
|
||||
|
||||
// ---------------
|
||||
// Generate Lanes
|
||||
// ---------------
|
||||
for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
|
||||
localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
|
||||
// Get a mask of active formats for this lane
|
||||
localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
|
||||
fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
|
||||
localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
|
||||
fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
|
||||
localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
|
||||
|
||||
// Cast-specific parameters
|
||||
localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
|
||||
fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
|
||||
localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
|
||||
fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
|
||||
localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
|
||||
|
||||
// Lane parameters from Opgroup
|
||||
localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
|
||||
? CONV_FORMATS : ACTIVE_FORMATS;
|
||||
localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
|
||||
|
||||
logic [LANE_WIDTH-1:0] local_result; // lane-local results
|
||||
|
||||
// Generate instances only if needed, lane 0 always generated
|
||||
if ((lane == 0) || EnableVectors) begin : active_lane
|
||||
logic in_valid, out_valid, out_ready; // lane-local handshake
|
||||
|
||||
logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands
|
||||
logic [LANE_WIDTH-1:0] op_result; // lane-local results
|
||||
fpnew_pkg::status_t op_status;
|
||||
|
||||
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
|
||||
|
||||
// Slice out the operands for this lane, upper bits are ignored in the unit
|
||||
always_comb begin : prepare_input
|
||||
for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
|
||||
local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
|
||||
end
|
||||
|
||||
// override operand 0 for some conversions
|
||||
if (OpGroup == fpnew_pkg::CONV) begin
|
||||
// Source is an integer
|
||||
if (op_i == fpnew_pkg::I2F) begin
|
||||
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
|
||||
// vectorial F2F up casts
|
||||
end else if (op_i == fpnew_pkg::F2F) begin
|
||||
if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
|
||||
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
|
||||
MAX_FP_WIDTH/2;
|
||||
end
|
||||
// CPK
|
||||
end else if (dst_is_cpk) begin
|
||||
if (lane == 1) begin
|
||||
local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Instantiate the operation from the selected opgroup
|
||||
if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
|
||||
fpnew_fma_multi #(
|
||||
.FpFmtConfig ( LANE_FORMATS ),
|
||||
.NumPipeRegs ( NumPipeRegs ),
|
||||
.PipeConfig ( PipeConfig ),
|
||||
.TagType ( TagType ),
|
||||
.AuxType ( logic [AUX_BITS-1:0] )
|
||||
) i_fpnew_fma_multi (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( local_operands ),
|
||||
.is_boxed_i,
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.src_fmt_i,
|
||||
.dst_fmt_i,
|
||||
.tag_i,
|
||||
.mask_i ( simd_mask_i[lane] ),
|
||||
.aux_i ( aux_data ),
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( lane_in_ready[lane] ),
|
||||
.flush_i,
|
||||
.result_o ( op_result ),
|
||||
.status_o ( op_status ),
|
||||
.extension_bit_o ( lane_ext_bit[lane] ),
|
||||
.tag_o ( lane_tags[lane] ),
|
||||
.mask_o ( lane_masks[lane] ),
|
||||
.aux_o ( lane_aux[lane] ),
|
||||
.out_valid_o ( out_valid ),
|
||||
.out_ready_i ( out_ready ),
|
||||
.busy_o ( lane_busy[lane] )
|
||||
);
|
||||
|
||||
end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
|
||||
fpnew_divsqrt_multi #(
|
||||
.FpFmtConfig ( LANE_FORMATS ),
|
||||
.NumPipeRegs ( NumPipeRegs ),
|
||||
.PipeConfig ( PipeConfig ),
|
||||
.TagType ( TagType ),
|
||||
.AuxType ( logic [AUX_BITS-1:0] )
|
||||
) i_fpnew_divsqrt_multi (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( local_operands[1:0] ), // 2 operands
|
||||
.is_boxed_i ( is_boxed_2op ), // 2 operands
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.dst_fmt_i,
|
||||
.tag_i,
|
||||
.mask_i ( simd_mask_i[lane] ),
|
||||
.aux_i ( aux_data ),
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( lane_in_ready[lane] ),
|
||||
.divsqrt_done_o ( divsqrt_done[lane] ),
|
||||
.simd_synch_done_i( simd_synch_done ),
|
||||
.divsqrt_ready_o ( divsqrt_ready[lane]),
|
||||
.simd_synch_rdy_i( simd_synch_rdy ),
|
||||
.flush_i,
|
||||
.result_o ( op_result ),
|
||||
.status_o ( op_status ),
|
||||
.extension_bit_o ( lane_ext_bit[lane] ),
|
||||
.tag_o ( lane_tags[lane] ),
|
||||
.mask_o ( lane_masks[lane] ),
|
||||
.aux_o ( lane_aux[lane] ),
|
||||
.out_valid_o ( out_valid ),
|
||||
.out_ready_i ( out_ready ),
|
||||
.busy_o ( lane_busy[lane] )
|
||||
);
|
||||
end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
|
||||
|
||||
end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
|
||||
fpnew_cast_multi #(
|
||||
.FpFmtConfig ( LANE_FORMATS ),
|
||||
.IntFmtConfig ( CONV_INT_FORMATS ),
|
||||
.NumPipeRegs ( NumPipeRegs ),
|
||||
.PipeConfig ( PipeConfig ),
|
||||
.TagType ( TagType ),
|
||||
.AuxType ( logic [AUX_BITS-1:0] )
|
||||
) i_fpnew_cast_multi (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( local_operands[0] ),
|
||||
.is_boxed_i ( is_boxed_1op ),
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.src_fmt_i,
|
||||
.dst_fmt_i,
|
||||
.int_fmt_i,
|
||||
.tag_i,
|
||||
.mask_i ( simd_mask_i[lane] ),
|
||||
.aux_i ( aux_data ),
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( lane_in_ready[lane] ),
|
||||
.flush_i,
|
||||
.result_o ( op_result ),
|
||||
.status_o ( op_status ),
|
||||
.extension_bit_o ( lane_ext_bit[lane] ),
|
||||
.tag_o ( lane_tags[lane] ),
|
||||
.mask_o ( lane_masks[lane] ),
|
||||
.aux_o ( lane_aux[lane] ),
|
||||
.out_valid_o ( out_valid ),
|
||||
.out_ready_i ( out_ready ),
|
||||
.busy_o ( lane_busy[lane] )
|
||||
);
|
||||
end // ADD OTHER OPTIONS HERE
|
||||
|
||||
// Handshakes are only done if the lane is actually used
|
||||
assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
|
||||
assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
|
||||
|
||||
// Properly NaN-box or sign-extend the slice result if not in use
|
||||
assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]};
|
||||
assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0;
|
||||
|
||||
// Otherwise generate constant sign-extension
|
||||
end else begin : inactive_lane
|
||||
assign lane_out_valid[lane] = 1'b0; // unused lane
|
||||
assign lane_in_ready[lane] = 1'b0; // unused lane
|
||||
assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
|
||||
assign lane_status[lane] = '0;
|
||||
assign lane_busy[lane] = 1'b0;
|
||||
end
|
||||
|
||||
// Generate result packing depending on float format
|
||||
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
// only for active formats within the lane
|
||||
if (ACTIVE_FORMATS[fmt]) begin
|
||||
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
|
||||
local_result[FP_WIDTH-1:0];
|
||||
end else if ((LANE+1)*FP_WIDTH <= Width) begin
|
||||
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
|
||||
'{default: lane_ext_bit[LANE]};
|
||||
end else if (LANE*FP_WIDTH < Width) begin
|
||||
assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
|
||||
'{default: lane_ext_bit[LANE]};
|
||||
end
|
||||
end
|
||||
|
||||
// Generate result packing depending on integer format
|
||||
if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
|
||||
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
|
||||
// Set up some constants
|
||||
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
|
||||
if (ACTIVE_INT_FORMATS[ifmt]) begin
|
||||
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
|
||||
local_result[INT_WIDTH-1:0];
|
||||
end else if ((LANE+1)*INT_WIDTH <= Width) begin
|
||||
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
|
||||
end else if (LANE*INT_WIDTH < Width) begin
|
||||
assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Extend slice result if needed
|
||||
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
|
||||
// Set up some constants
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
if (NUM_LANES*FP_WIDTH < Width)
|
||||
assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
|
||||
end
|
||||
|
||||
// Mute int results if unused
|
||||
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
|
||||
if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
|
||||
assign ifmt_slice_result[ifmt] = '0;
|
||||
end
|
||||
end
|
||||
|
||||
// Bypass lanes with target operand for vectorial casts
|
||||
if (OpGroup == fpnew_pkg::CONV) begin : target_regs
|
||||
// Bypass pipeline signals, index i holds signal after i register stages
|
||||
logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
|
||||
logic [0:NumPipeRegs][2:0] byp_pipe_aux_q;
|
||||
logic [0:NumPipeRegs] byp_pipe_valid_q;
|
||||
// Ready signal is combinatorial for all stages
|
||||
logic [0:NumPipeRegs] byp_pipe_ready;
|
||||
|
||||
// Input stage: First element of pipeline is taken from inputs
|
||||
assign byp_pipe_target_q[0] = conv_target_d;
|
||||
assign byp_pipe_aux_q[0] = target_aux_d;
|
||||
assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op;
|
||||
// Generate the register stages
|
||||
for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
|
||||
// Internal register enable for this stage
|
||||
logic reg_ena;
|
||||
// Determine the ready signal of the current stage - advance the pipeline:
|
||||
// 1. if the next stage is ready for our data
|
||||
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
|
||||
assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
|
||||
// Valid: enabled by ready signal, synchronous clear with the flush signal
|
||||
`FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
|
||||
// Enable register if pipleine ready and a valid data item is present
|
||||
assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i];
|
||||
// Generate the pipeline registers within the stages, use enable-registers
|
||||
`FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0)
|
||||
`FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0)
|
||||
end
|
||||
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
|
||||
assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
|
||||
// Output stage: assign module outputs
|
||||
assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
|
||||
|
||||
// decode the aux data
|
||||
assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
|
||||
end else begin : no_conv
|
||||
assign {result_vec_op, result_is_cpk} = '0;
|
||||
end
|
||||
|
||||
// Synch lanes if there is more than one
|
||||
assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0];
|
||||
assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0];
|
||||
|
||||
// ------------
|
||||
// Output Side
|
||||
// ------------
|
||||
assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
|
||||
|
||||
assign result_o = result_fmt_is_int
|
||||
? ifmt_slice_result[result_fmt]
|
||||
: fmt_slice_result[result_fmt];
|
||||
|
||||
assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
|
||||
assign tag_o = lane_tags[0]; // don't care about upper ones
|
||||
assign busy_o = (| lane_busy);
|
||||
|
||||
assign out_valid_o = lane_out_valid[0]; // don't care about upper ones
|
||||
|
||||
// Collapse the status
|
||||
always_comb begin : output_processing
|
||||
// Collapse the status
|
||||
automatic fpnew_pkg::status_t temp_status;
|
||||
temp_status = '0;
|
||||
for (int i = 0; i < int'(NUM_LANES); i++)
|
||||
temp_status |= lane_status[i] & {5{lane_masks[i]}};
|
||||
status_o = temp_status;
|
||||
end
|
||||
|
||||
endmodule
|
495
vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
vendored
495
vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv
vendored
|
@ -1,495 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
package fpnew_pkg;
|
||||
|
||||
// ---------
|
||||
// FP TYPES
|
||||
// ---------
|
||||
// | Enumerator | Format | Width | EXP_BITS | MAN_BITS
|
||||
// |:----------:|------------------|-------:|:--------:|:--------:
|
||||
// | FP32 | IEEE binary32 | 32 bit | 8 | 23
|
||||
// | FP64 | IEEE binary64 | 64 bit | 11 | 52
|
||||
// | FP16 | IEEE binary16 | 16 bit | 5 | 10
|
||||
// | FP8 | binary8 | 8 bit | 5 | 2
|
||||
// | FP16ALT | binary16alt | 16 bit | 8 | 7
|
||||
// *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
|
||||
|
||||
// Encoding for a format
|
||||
typedef struct packed {
|
||||
int unsigned exp_bits;
|
||||
int unsigned man_bits;
|
||||
} fp_encoding_t;
|
||||
|
||||
localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
|
||||
localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);
|
||||
|
||||
// FP formats
|
||||
typedef enum logic [FP_FORMAT_BITS-1:0] {
|
||||
FP32 = 'd0,
|
||||
FP64 = 'd1,
|
||||
FP16 = 'd2,
|
||||
FP8 = 'd3,
|
||||
FP16ALT = 'd4
|
||||
// add new formats here
|
||||
} fp_format_e;
|
||||
|
||||
// Encodings for supported FP formats
|
||||
localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS = '{
|
||||
'{8, 23}, // IEEE binary32 (single)
|
||||
'{11, 52}, // IEEE binary64 (double)
|
||||
'{5, 10}, // IEEE binary16 (half)
|
||||
'{5, 2}, // custom binary8
|
||||
'{8, 7} // custom binary16alt
|
||||
// add new formats here
|
||||
};
|
||||
|
||||
typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks)
|
||||
typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format
|
||||
|
||||
localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only
|
||||
|
||||
// ---------
|
||||
// INT TYPES
|
||||
// ---------
|
||||
// | Enumerator | Width |
|
||||
// |:----------:|-------:|
|
||||
// | INT8 | 8 bit |
|
||||
// | INT16 | 16 bit |
|
||||
// | INT32 | 32 bit |
|
||||
// | INT64 | 64 bit |
|
||||
// *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!
|
||||
|
||||
localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats
|
||||
localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS);
|
||||
|
||||
// Int formats
|
||||
typedef enum logic [INT_FORMAT_BITS-1:0] {
|
||||
INT8,
|
||||
INT16,
|
||||
INT32,
|
||||
INT64
|
||||
// add new formats here
|
||||
} int_format_e;
|
||||
|
||||
// Returns the width of an INT format by index
|
||||
function automatic int unsigned int_width(int_format_e ifmt);
|
||||
unique case (ifmt)
|
||||
INT8: return 8;
|
||||
INT16: return 16;
|
||||
INT32: return 32;
|
||||
INT64: return 64;
|
||||
default: begin
|
||||
// pragma translate_off
|
||||
$fatal(1, "Invalid INT format supplied");
|
||||
// pragma translate_on
|
||||
// just return any integer to avoid any latches
|
||||
// hopefully this error is caught by simulation
|
||||
return INT8;
|
||||
end
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks)
|
||||
|
||||
// --------------
|
||||
// FP OPERATIONS
|
||||
// --------------
|
||||
localparam int unsigned NUM_OPGROUPS = 4;
|
||||
|
||||
// Each FP operation belongs to an operation group
|
||||
typedef enum logic [1:0] {
|
||||
ADDMUL, DIVSQRT, NONCOMP, CONV
|
||||
} opgroup_e;
|
||||
|
||||
localparam int unsigned OP_BITS = 4;
|
||||
|
||||
typedef enum logic [OP_BITS-1:0] {
|
||||
FMADD, FNMSUB, ADD, MUL, // ADDMUL operation group
|
||||
DIV, SQRT, // DIVSQRT operation group
|
||||
SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group
|
||||
F2F, F2I, I2F, CPKAB, CPKCD // CONV operation group
|
||||
} operation_e;
|
||||
|
||||
// -------------------
|
||||
// RISC-V FP-SPECIFIC
|
||||
// -------------------
|
||||
// Rounding modes
|
||||
typedef enum logic [2:0] {
|
||||
RNE = 3'b000,
|
||||
RTZ = 3'b001,
|
||||
RDN = 3'b010,
|
||||
RUP = 3'b011,
|
||||
RMM = 3'b100,
|
||||
ROD = 3'b101, // This mode is not defined in RISC-V FP-SPEC
|
||||
DYN = 3'b111
|
||||
} roundmode_e;
|
||||
|
||||
// Status flags
|
||||
typedef struct packed {
|
||||
logic NV; // Invalid
|
||||
logic DZ; // Divide by zero
|
||||
logic OF; // Overflow
|
||||
logic UF; // Underflow
|
||||
logic NX; // Inexact
|
||||
} status_t;
|
||||
|
||||
// Information about a floating point value
|
||||
typedef struct packed {
|
||||
logic is_normal; // is the value normal
|
||||
logic is_subnormal; // is the value subnormal
|
||||
logic is_zero; // is the value zero
|
||||
logic is_inf; // is the value infinity
|
||||
logic is_nan; // is the value NaN
|
||||
logic is_signalling; // is the value a signalling NaN
|
||||
logic is_quiet; // is the value a quiet NaN
|
||||
logic is_boxed; // is the value properly NaN-boxed (RISC-V specific)
|
||||
} fp_info_t;
|
||||
|
||||
// Classification mask
|
||||
typedef enum logic [9:0] {
|
||||
NEGINF = 10'b00_0000_0001,
|
||||
NEGNORM = 10'b00_0000_0010,
|
||||
NEGSUBNORM = 10'b00_0000_0100,
|
||||
NEGZERO = 10'b00_0000_1000,
|
||||
POSZERO = 10'b00_0001_0000,
|
||||
POSSUBNORM = 10'b00_0010_0000,
|
||||
POSNORM = 10'b00_0100_0000,
|
||||
POSINF = 10'b00_1000_0000,
|
||||
SNAN = 10'b01_0000_0000,
|
||||
QNAN = 10'b10_0000_0000
|
||||
} classmask_e;
|
||||
|
||||
// ------------------
|
||||
// FPU configuration
|
||||
// ------------------
|
||||
// Pipelining registers can be inserted (at elaboration time) into operational units
|
||||
typedef enum logic [1:0] {
|
||||
BEFORE, // registers are inserted at the inputs of the unit
|
||||
AFTER, // registers are inserted at the outputs of the unit
|
||||
INSIDE, // registers are inserted at predetermined (suboptimal) locations in the unit
|
||||
DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE
|
||||
} pipe_config_t;
|
||||
|
||||
// Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all.
|
||||
typedef enum logic [1:0] {
|
||||
DISABLED, // arithmetic units are not generated
|
||||
PARALLEL, // arithmetic units are generated in prallel slices, one for each format
|
||||
MERGED // arithmetic units are contained within a merged unit holding multiple formats
|
||||
} unit_type_t;
|
||||
|
||||
// Array of unit types indexed by format
|
||||
typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t;
|
||||
|
||||
// Array of format-specific unit types by opgroup
|
||||
typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t;
|
||||
// same with unsigned
|
||||
typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t;
|
||||
|
||||
// FPU configuration: features
|
||||
typedef struct packed {
|
||||
int unsigned Width;
|
||||
logic EnableVectors;
|
||||
logic EnableNanBox;
|
||||
fmt_logic_t FpFmtMask;
|
||||
ifmt_logic_t IntFmtMask;
|
||||
} fpu_features_t;
|
||||
|
||||
localparam fpu_features_t RV64D = '{
|
||||
Width: 64,
|
||||
EnableVectors: 1'b0,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b11000,
|
||||
IntFmtMask: 4'b0011
|
||||
};
|
||||
|
||||
localparam fpu_features_t RV32D = '{
|
||||
Width: 64,
|
||||
EnableVectors: 1'b1,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b11000,
|
||||
IntFmtMask: 4'b0010
|
||||
};
|
||||
|
||||
localparam fpu_features_t RV32F = '{
|
||||
Width: 32,
|
||||
EnableVectors: 1'b0,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b10000,
|
||||
IntFmtMask: 4'b0010
|
||||
};
|
||||
|
||||
localparam fpu_features_t RV64D_Xsflt = '{
|
||||
Width: 64,
|
||||
EnableVectors: 1'b1,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b11111,
|
||||
IntFmtMask: 4'b1111
|
||||
};
|
||||
|
||||
localparam fpu_features_t RV32F_Xsflt = '{
|
||||
Width: 32,
|
||||
EnableVectors: 1'b1,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b10111,
|
||||
IntFmtMask: 4'b1110
|
||||
};
|
||||
|
||||
localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{
|
||||
Width: 32,
|
||||
EnableVectors: 1'b1,
|
||||
EnableNanBox: 1'b1,
|
||||
FpFmtMask: 5'b10001,
|
||||
IntFmtMask: 4'b0110
|
||||
};
|
||||
|
||||
|
||||
// FPU configuraion: implementation
|
||||
typedef struct packed {
|
||||
opgrp_fmt_unsigned_t PipeRegs;
|
||||
opgrp_fmt_unit_types_t UnitTypes;
|
||||
pipe_config_t PipeConfig;
|
||||
} fpu_implementation_t;
|
||||
|
||||
localparam fpu_implementation_t DEFAULT_NOREGS = '{
|
||||
PipeRegs: '{default: 0},
|
||||
UnitTypes: '{'{default: PARALLEL}, // ADDMUL
|
||||
'{default: MERGED}, // DIVSQRT
|
||||
'{default: PARALLEL}, // NONCOMP
|
||||
'{default: MERGED}}, // CONV
|
||||
PipeConfig: BEFORE
|
||||
};
|
||||
|
||||
localparam fpu_implementation_t DEFAULT_SNITCH = '{
|
||||
PipeRegs: '{default: 1},
|
||||
UnitTypes: '{'{default: PARALLEL}, // ADDMUL
|
||||
'{default: DISABLED}, // DIVSQRT
|
||||
'{default: PARALLEL}, // NONCOMP
|
||||
'{default: MERGED}}, // CONV
|
||||
PipeConfig: BEFORE
|
||||
};
|
||||
|
||||
// -----------------------
|
||||
// Synthesis optimization
|
||||
// -----------------------
|
||||
localparam logic DONT_CARE = 1'b1; // the value to assign as don't care
|
||||
|
||||
// -------------------------
|
||||
// General helper functions
|
||||
// -------------------------
|
||||
function automatic int minimum(int a, int b);
|
||||
return (a < b) ? a : b;
|
||||
endfunction
|
||||
|
||||
function automatic int maximum(int a, int b);
|
||||
return (a > b) ? a : b;
|
||||
endfunction
|
||||
|
||||
// -------------------------------------------
|
||||
// Helper functions for FP formats and values
|
||||
// -------------------------------------------
|
||||
// Returns the width of a FP format
|
||||
function automatic int unsigned fp_width(fp_format_e fmt);
|
||||
return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1;
|
||||
endfunction
|
||||
|
||||
// Returns the widest FP format present
|
||||
function automatic int unsigned max_fp_width(fmt_logic_t cfg);
|
||||
automatic int unsigned res = 0;
|
||||
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
|
||||
if (cfg[i])
|
||||
res = unsigned'(maximum(res, fp_width(fp_format_e'(i))));
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// Returns the narrowest FP format present
|
||||
function automatic int unsigned min_fp_width(fmt_logic_t cfg);
|
||||
automatic int unsigned res = max_fp_width(cfg);
|
||||
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
|
||||
if (cfg[i])
|
||||
res = unsigned'(minimum(res, fp_width(fp_format_e'(i))));
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// Returns the number of expoent bits for a format
|
||||
function automatic int unsigned exp_bits(fp_format_e fmt);
|
||||
return FP_ENCODINGS[fmt].exp_bits;
|
||||
endfunction
|
||||
|
||||
// Returns the number of mantissa bits for a format
|
||||
function automatic int unsigned man_bits(fp_format_e fmt);
|
||||
return FP_ENCODINGS[fmt].man_bits;
|
||||
endfunction
|
||||
|
||||
// Returns the bias value for a given format (as per IEEE 754-2008)
|
||||
function automatic int unsigned bias(fp_format_e fmt);
|
||||
return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias
|
||||
endfunction
|
||||
|
||||
function automatic fp_encoding_t super_format(fmt_logic_t cfg);
|
||||
automatic fp_encoding_t res;
|
||||
res = '0;
|
||||
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
|
||||
if (cfg[fmt]) begin // only active format
|
||||
res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt))));
|
||||
res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt))));
|
||||
end
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// -------------------------------------------
|
||||
// Helper functions for INT formats and values
|
||||
// -------------------------------------------
|
||||
// Returns the widest INT format present
|
||||
function automatic int unsigned max_int_width(ifmt_logic_t cfg);
|
||||
automatic int unsigned res = 0;
|
||||
for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin
|
||||
if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt)));
|
||||
end
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// --------------------------------------------------
|
||||
// Helper functions for operations and FPU structure
|
||||
// --------------------------------------------------
|
||||
// Returns the operation group of the given operation
|
||||
function automatic opgroup_e get_opgroup(operation_e op);
|
||||
unique case (op)
|
||||
FMADD, FNMSUB, ADD, MUL: return ADDMUL;
|
||||
DIV, SQRT: return DIVSQRT;
|
||||
SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP;
|
||||
F2F, F2I, I2F, CPKAB, CPKCD: return CONV;
|
||||
default: return NONCOMP;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
// Returns the number of operands by operation group
|
||||
function automatic int unsigned num_operands(opgroup_e grp);
|
||||
unique case (grp)
|
||||
ADDMUL: return 3;
|
||||
DIVSQRT: return 2;
|
||||
NONCOMP: return 2;
|
||||
CONV: return 3; // vectorial casts use 3 operands
|
||||
default: return 0;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
// Returns the number of lanes according to width, format and vectors
|
||||
function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec);
|
||||
return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane
|
||||
endfunction
|
||||
|
||||
// Returns the maximum number of lanes in the FPU according to width, format config and vectors
|
||||
function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec);
|
||||
return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane
|
||||
endfunction
|
||||
|
||||
// Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice
|
||||
function automatic fmt_logic_t get_lane_formats(int unsigned width,
|
||||
fmt_logic_t cfg,
|
||||
int unsigned lane_no);
|
||||
automatic fmt_logic_t res;
|
||||
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
|
||||
// Mask active formats with the number of lanes for that format
|
||||
res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no);
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice
|
||||
function automatic ifmt_logic_t get_lane_int_formats(int unsigned width,
|
||||
fmt_logic_t cfg,
|
||||
ifmt_logic_t icfg,
|
||||
int unsigned lane_no);
|
||||
automatic ifmt_logic_t res;
|
||||
automatic fmt_logic_t lanefmts;
|
||||
res = '0;
|
||||
lanefmts = get_lane_formats(width, cfg, lane_no);
|
||||
|
||||
for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
|
||||
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
|
||||
// Mask active int formats with the width of the float formats
|
||||
if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))))
|
||||
res[ifmt] |= icfg[ifmt] && lanefmts[fmt];
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// Returns a mask of active FP formats that are present in lane lane_no of a CONV slice
|
||||
function automatic fmt_logic_t get_conv_lane_formats(int unsigned width,
|
||||
fmt_logic_t cfg,
|
||||
int unsigned lane_no);
|
||||
automatic fmt_logic_t res;
|
||||
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
|
||||
// Mask active formats with the number of lanes for that format, CPK at least twice
|
||||
res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) ||
|
||||
(CPK_FORMATS[fmt] && (lane_no < 2)));
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// Returns a mask of active INT formats that are present in lane lane_no of a CONV slice
|
||||
function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width,
|
||||
fmt_logic_t cfg,
|
||||
ifmt_logic_t icfg,
|
||||
int unsigned lane_no);
|
||||
automatic ifmt_logic_t res;
|
||||
automatic fmt_logic_t lanefmts;
|
||||
res = '0;
|
||||
lanefmts = get_conv_lane_formats(width, cfg, lane_no);
|
||||
|
||||
for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++)
|
||||
for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++)
|
||||
// Mask active int formats with the width of the float formats
|
||||
res[ifmt] |= icfg[ifmt] && lanefmts[fmt] &&
|
||||
(fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)));
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
// Return whether any active format is set as MERGED
|
||||
function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
|
||||
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
|
||||
if (cfg[i] && types[i] == MERGED)
|
||||
return 1'b1;
|
||||
return 1'b0;
|
||||
endfunction
|
||||
|
||||
// Return whether the given format is the first active one set as MERGED
|
||||
function automatic logic is_first_enabled_multi(fp_format_e fmt,
|
||||
fmt_unit_types_t types,
|
||||
fmt_logic_t cfg);
|
||||
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
|
||||
if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt);
|
||||
end
|
||||
return 1'b0;
|
||||
endfunction
|
||||
|
||||
// Returns the first format that is active and is set as MERGED
|
||||
function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg);
|
||||
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++)
|
||||
if (cfg[i] && types[i] == MERGED)
|
||||
return fp_format_e'(i);
|
||||
return fp_format_e'(0);
|
||||
endfunction
|
||||
|
||||
// Returns the largest number of regs that is active and is set as MERGED
|
||||
function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs,
|
||||
fmt_unit_types_t types,
|
||||
fmt_logic_t cfg);
|
||||
automatic int unsigned res = 0;
|
||||
for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin
|
||||
if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]);
|
||||
end
|
||||
return res;
|
||||
endfunction
|
||||
|
||||
endpackage
|
76
vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
vendored
76
vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv
vendored
|
@ -1,76 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
module fpnew_rounding #(
|
||||
parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit
|
||||
) (
|
||||
// Input value
|
||||
input logic [AbsWidth-1:0] abs_value_i, // absolute value without sign
|
||||
input logic sign_i,
|
||||
// Rounding information
|
||||
input logic [1:0] round_sticky_bits_i, // round and sticky bits {RS}
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input logic effective_subtraction_i, // sign of inputs affects rounding of zeroes
|
||||
// Output value
|
||||
output logic [AbsWidth-1:0] abs_rounded_o, // absolute value without sign
|
||||
output logic sign_o,
|
||||
// Output classification
|
||||
output logic exact_zero_o // output is an exact zero
|
||||
);
|
||||
|
||||
logic round_up; // Rounding decision
|
||||
|
||||
// Take the rounding decision according to RISC-V spec
|
||||
// RoundMode | Mnemonic | Meaning
|
||||
// :--------:|:--------:|:-------
|
||||
// 000 | RNE | Round to Nearest, ties to Even
|
||||
// 001 | RTZ | Round towards Zero
|
||||
// 010 | RDN | Round Down (towards -\infty)
|
||||
// 011 | RUP | Round Up (towards \infty)
|
||||
// 100 | RMM | Round to Nearest, ties to Max Magnitude
|
||||
// 101 | ROD | Round towards odd (this mode is not define in RISC-V FP-SPEC)
|
||||
// others | | *invalid*
|
||||
always_comb begin : rounding_decision
|
||||
unique case (rnd_mode_i)
|
||||
fpnew_pkg::RNE: // Decide accoring to round/sticky bits
|
||||
unique case (round_sticky_bits_i)
|
||||
2'b00,
|
||||
2'b01: round_up = 1'b0; // < ulp/2 away, round down
|
||||
2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result
|
||||
2'b11: round_up = 1'b1; // > ulp/2 away, round up
|
||||
default: round_up = fpnew_pkg::DONT_CARE;
|
||||
endcase
|
||||
fpnew_pkg::RTZ: round_up = 1'b0; // always round down
|
||||
fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if -
|
||||
fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if +
|
||||
fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i);
|
||||
default: round_up = fpnew_pkg::DONT_CARE; // propagate x
|
||||
endcase
|
||||
end
|
||||
|
||||
// Perform the rounding, exponent change and overflow to inf happens automagically
|
||||
assign abs_rounded_o = abs_value_i + round_up;
|
||||
|
||||
// True zero result is a zero result without dirty round/sticky bits
|
||||
assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0);
|
||||
|
||||
// In case of effective subtraction (thus signs of addition operands must have differed) and a
|
||||
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i)
|
||||
? (rnd_mode_i == fpnew_pkg::RDN)
|
||||
: sign_i;
|
||||
|
||||
endmodule
|
185
vendor/openhwgroup/cvfpu/src/fpnew_top.sv
vendored
185
vendor/openhwgroup/cvfpu/src/fpnew_top.sv
vendored
|
@ -1,185 +0,0 @@
|
|||
// Copyright 2019 ETH Zurich and University of Bologna.
|
||||
//
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// SPDX-License-Identifier: SHL-0.51
|
||||
|
||||
// Author: Stefan Mach <smach@iis.ee.ethz.ch>
|
||||
|
||||
module fpnew_top #(
|
||||
// FPU configuration
|
||||
parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt,
|
||||
parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS,
|
||||
parameter type TagType = logic,
|
||||
parameter int unsigned TrueSIMDClass = 0,
|
||||
parameter int unsigned EnableSIMDMask = 0,
|
||||
// Do not change
|
||||
localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors),
|
||||
localparam type MaskType = logic [NumLanes-1:0],
|
||||
localparam int unsigned WIDTH = Features.Width,
|
||||
localparam int unsigned NUM_OPERANDS = 3
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// Input signals
|
||||
input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
|
||||
input fpnew_pkg::roundmode_e rnd_mode_i,
|
||||
input fpnew_pkg::operation_e op_i,
|
||||
input logic op_mod_i,
|
||||
input fpnew_pkg::fp_format_e src_fmt_i,
|
||||
input fpnew_pkg::fp_format_e dst_fmt_i,
|
||||
input fpnew_pkg::int_format_e int_fmt_i,
|
||||
input logic vectorial_op_i,
|
||||
input TagType tag_i,
|
||||
input MaskType simd_mask_i,
|
||||
// Input Handshake
|
||||
input logic in_valid_i,
|
||||
output logic in_ready_o,
|
||||
input logic flush_i,
|
||||
// Output signals
|
||||
output logic [WIDTH-1:0] result_o,
|
||||
output fpnew_pkg::status_t status_o,
|
||||
output TagType tag_o,
|
||||
// Output handshake
|
||||
output logic out_valid_o,
|
||||
input logic out_ready_i,
|
||||
// Indication of valid data in flight
|
||||
output logic busy_o
|
||||
);
|
||||
|
||||
localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS;
|
||||
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS;
|
||||
|
||||
// ----------------
|
||||
// Type Definition
|
||||
// ----------------
|
||||
typedef struct packed {
|
||||
logic [WIDTH-1:0] result;
|
||||
fpnew_pkg::status_t status;
|
||||
TagType tag;
|
||||
} output_t;
|
||||
|
||||
// Handshake signals for the blocks
|
||||
logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy;
|
||||
output_t [NUM_OPGROUPS-1:0] opgrp_outputs;
|
||||
|
||||
logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed;
|
||||
|
||||
// -----------
|
||||
// Input Side
|
||||
// -----------
|
||||
assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)];
|
||||
|
||||
// NaN-boxing check
|
||||
for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check
|
||||
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
|
||||
// NaN boxing is only generated if it's enabled and needed
|
||||
if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check
|
||||
for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands
|
||||
assign is_boxed[fmt][op] = (!vectorial_op_i)
|
||||
? operands_i[op][WIDTH-1:FP_WIDTH] == '1
|
||||
: 1'b1;
|
||||
end
|
||||
end else begin : no_check
|
||||
assign is_boxed[fmt] = '1;
|
||||
end
|
||||
end
|
||||
|
||||
// Filter out the mask if not used
|
||||
MaskType simd_mask;
|
||||
assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}};
|
||||
|
||||
// -------------------------
|
||||
// Generate Operation Blocks
|
||||
// -------------------------
|
||||
for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups
|
||||
localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp));
|
||||
|
||||
logic in_valid;
|
||||
logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed;
|
||||
|
||||
assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp));
|
||||
|
||||
// slice out input boxing
|
||||
always_comb begin : slice_inputs
|
||||
for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++)
|
||||
input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0];
|
||||
end
|
||||
|
||||
fpnew_opgroup_block #(
|
||||
.OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ),
|
||||
.Width ( WIDTH ),
|
||||
.EnableVectors ( Features.EnableVectors ),
|
||||
.FpFmtMask ( Features.FpFmtMask ),
|
||||
.IntFmtMask ( Features.IntFmtMask ),
|
||||
.FmtPipeRegs ( Implementation.PipeRegs[opgrp] ),
|
||||
.FmtUnitTypes ( Implementation.UnitTypes[opgrp] ),
|
||||
.PipeConfig ( Implementation.PipeConfig ),
|
||||
.TagType ( TagType ),
|
||||
.TrueSIMDClass ( TrueSIMDClass )
|
||||
) i_opgroup_block (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.operands_i ( operands_i[NUM_OPS-1:0] ),
|
||||
.is_boxed_i ( input_boxed ),
|
||||
.rnd_mode_i,
|
||||
.op_i,
|
||||
.op_mod_i,
|
||||
.src_fmt_i,
|
||||
.dst_fmt_i,
|
||||
.int_fmt_i,
|
||||
.vectorial_op_i,
|
||||
.tag_i,
|
||||
.simd_mask_i ( simd_mask ),
|
||||
.in_valid_i ( in_valid ),
|
||||
.in_ready_o ( opgrp_in_ready[opgrp] ),
|
||||
.flush_i,
|
||||
.result_o ( opgrp_outputs[opgrp].result ),
|
||||
.status_o ( opgrp_outputs[opgrp].status ),
|
||||
.extension_bit_o ( opgrp_ext[opgrp] ),
|
||||
.tag_o ( opgrp_outputs[opgrp].tag ),
|
||||
.out_valid_o ( opgrp_out_valid[opgrp] ),
|
||||
.out_ready_i ( opgrp_out_ready[opgrp] ),
|
||||
.busy_o ( opgrp_busy[opgrp] )
|
||||
);
|
||||
end
|
||||
|
||||
// ------------------
|
||||
// Arbitrate Outputs
|
||||
// ------------------
|
||||
output_t arbiter_output;
|
||||
|
||||
// Round-Robin arbiter to decide which result to use
|
||||
rr_arb_tree #(
|
||||
.NumIn ( NUM_OPGROUPS ),
|
||||
.DataType ( output_t ),
|
||||
.AxiVldRdy ( 1'b1 )
|
||||
) i_arbiter (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.rr_i ( '0 ),
|
||||
.req_i ( opgrp_out_valid ),
|
||||
.gnt_o ( opgrp_out_ready ),
|
||||
.data_i ( opgrp_outputs ),
|
||||
.gnt_i ( out_ready_i ),
|
||||
.req_o ( out_valid_o ),
|
||||
.data_o ( arbiter_output ),
|
||||
.idx_o ( /* unused */ )
|
||||
);
|
||||
|
||||
// Unpack output
|
||||
assign result_o = arbiter_output.result;
|
||||
assign status_o = arbiter_output.status;
|
||||
assign tag_o = arbiter_output.tag;
|
||||
|
||||
assign busy_o = (| opgrp_busy);
|
||||
|
||||
endmodule
|
|
@ -1,25 +0,0 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
||||
|
||||
In this sense, we interpret the "Public API" of a hardware module as its port/parameter list.
|
||||
Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility.
|
||||
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
### Changed
|
||||
### Fixed
|
||||
|
||||
|
||||
## [1.0.4] - 2020-10-05
|
||||
|
||||
### Added
|
||||
- Started formal changelog
|
||||
|
||||
### Changed
|
||||
### Fixed
|
||||
- Signalling behavior for underflows to 0.0
|
|
@ -1,176 +0,0 @@
|
|||
SOLDERPAD HARDWARE LICENSE version 0.51
|
||||
|
||||
This license is based closely on the Apache License Version 2.0, but is not
|
||||
approved or endorsed by the Apache Foundation. A copy of the non-modified
|
||||
Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
|
||||
As this license is not currently OSI or FSF approved, the Licensor permits any
|
||||
Work licensed under this License, at the option of the Licensee, to be treated
|
||||
as licensed under the Apache License Version 2.0 (which is so approved).
|
||||
|
||||
This License is licensed under the terms of this License and in particular
|
||||
clause 7 below (Disclaimer of Warranties) applies in relation to its use.
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and
|
||||
distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the Rights owner or entity authorized by the Rights owner
|
||||
that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other entities
|
||||
that control, are controlled by, or are under common control with that entity.
|
||||
For the purposes of this definition, "control" means (i) the power, direct or
|
||||
indirect, to cause the direction or management of such entity, whether by
|
||||
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising
|
||||
permissions granted by this License.
|
||||
|
||||
"Rights" means copyright and any similar right including design right (whether
|
||||
registered or unregistered), semiconductor topography (mask) rights and
|
||||
database rights (but excluding Patents and Trademarks).
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications, including
|
||||
but not limited to source code, net lists, board layouts, CAD files,
|
||||
documentation source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation or
|
||||
translation of a Source form, including but not limited to compiled object
|
||||
code, generated documentation, the instantiation of a hardware design and
|
||||
conversions to other media types, including intermediate forms such as
|
||||
bytecodes, FPGA bitstreams, artwork and semiconductor topographies (mask
|
||||
works).
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source form or other
|
||||
Object form, made available under the License, as indicated by a Rights notice
|
||||
that is included in or attached to the work (an example is provided in the
|
||||
Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form, that
|
||||
is based on (or derived from) the Work and for which the editorial revisions,
|
||||
annotations, elaborations, or other modifications represent, as a whole, an
|
||||
original work of authorship. For the purposes of this License, Derivative Works
|
||||
shall not include works that remain separable from, or merely link (or bind by
|
||||
name) or physically connect to or interoperate with the interfaces of, the Work
|
||||
and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any design or work of authorship, including the
|
||||
original version of the Work and any modifications or additions to that Work or
|
||||
Derivative Works thereof, that is intentionally submitted to Licensor for
|
||||
inclusion in the Work by the Rights owner or by an individual or Legal Entity
|
||||
authorized to submit on behalf of the Rights owner. For the purposes of this
|
||||
definition, "submitted" means any form of electronic, verbal, or written
|
||||
communication sent to the Licensor or its representatives, including but not
|
||||
limited to communication on electronic mailing lists, source code control
|
||||
systems, and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but excluding
|
||||
communication that is conspicuously marked or otherwise designated in writing
|
||||
by the Rights owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
|
||||
of whom a Contribution has been received by Licensor and subsequently
|
||||
incorporated within the Work.
|
||||
|
||||
2. Grant of License. Subject to the terms and conditions of this License, each
|
||||
Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
|
||||
no-charge, royalty-free, irrevocable license under the Rights to reproduce,
|
||||
prepare Derivative Works of, publicly display, publicly perform, sublicense,
|
||||
and distribute the Work and such Derivative Works in Source or Object form and
|
||||
do anything in relation to the Work as if the Rights did not exist.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of this
|
||||
License, each Contributor hereby grants to You a perpetual, worldwide,
|
||||
non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
|
||||
section) patent license to make, have made, use, offer to sell, sell, import,
|
||||
and otherwise transfer the Work, where such license applies only to those
|
||||
patent claims licensable by such Contributor that are necessarily infringed by
|
||||
their Contribution(s) alone or by combination of their Contribution(s) with the
|
||||
Work to which such Contribution(s) was submitted. If You institute patent
|
||||
litigation against any entity (including a cross-claim or counterclaim in a
|
||||
lawsuit) alleging that the Work or a Contribution incorporated within the Work
|
||||
constitutes direct or contributory patent infringement, then any patent
|
||||
licenses granted to You under this License for that Work shall terminate as of
|
||||
the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the Work or
|
||||
Derivative Works thereof in any medium, with or without modifications, and in
|
||||
Source or Object form, provided that You meet the following conditions:
|
||||
|
||||
You must give any other recipients of the Work or Derivative Works a copy
|
||||
of this License; and
|
||||
|
||||
You must cause any modified files to carry prominent notices stating that
|
||||
You changed the files; and
|
||||
|
||||
You must retain, in the Source form of any Derivative Works that You
|
||||
distribute, all copyright, patent, trademark, and attribution notices from
|
||||
the Source form of the Work, excluding those notices that do not pertain to
|
||||
any part of the Derivative Works; and
|
||||
|
||||
If the Work includes a "NOTICE" text file as part of its distribution, then
|
||||
any Derivative Works that You distribute must include a readable copy of
|
||||
the attribution notices contained within such NOTICE file, excluding those
|
||||
notices that do not pertain to any part of the Derivative Works, in at
|
||||
least one of the following places: within a NOTICE text file distributed as
|
||||
part of the Derivative Works; within the Source form or documentation, if
|
||||
provided along with the Derivative Works; or, within a display generated by
|
||||
the Derivative Works, if and wherever such third-party notices normally
|
||||
appear. The contents of the NOTICE file are for informational purposes only
|
||||
and do not modify the License. You may add Your own attribution notices
|
||||
within Derivative Works that You distribute, alongside or as an addendum to
|
||||
the NOTICE text from the Work, provided that such additional attribution
|
||||
notices cannot be construed as modifying the License. You may add Your own
|
||||
copyright statement to Your modifications and may provide additional or
|
||||
different license terms and conditions for use, reproduction, or
|
||||
distribution of Your modifications, or for any such Derivative Works as a
|
||||
whole, provided Your use, reproduction, and distribution of the Work
|
||||
otherwise complies with the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, any
|
||||
Contribution intentionally submitted for inclusion in the Work by You to the
|
||||
Licensor shall be under the terms and conditions of this License, without any
|
||||
additional terms or conditions. Notwithstanding the above, nothing herein shall
|
||||
supersede or modify the terms of any separate license agreement you may have
|
||||
executed with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade names,
|
||||
trademarks, service marks, or product names of the Licensor, except as required
|
||||
for reasonable and customary use in describing the origin of the Work and
|
||||
reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
|
||||
writing, Licensor provides the Work (and each Contributor provides its
|
||||
Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied, including, without limitation, any warranties
|
||||
or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any risks
|
||||
associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, whether in
|
||||
tort (including negligence), contract, or otherwise, unless required by
|
||||
applicable law (such as deliberate and grossly negligent acts) or agreed to in
|
||||
writing, shall any Contributor be liable to You for damages, including any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character arising as a result of this License or out of the use or inability to
|
||||
use the Work (including but not limited to damages for loss of goodwill, work
|
||||
stoppage, computer failure or malfunction, or any and all other commercial
|
||||
damages or losses), even if such Contributor has been advised of the
|
||||
possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing the Work or
|
||||
Derivative Works thereof, You may choose to offer, and charge a fee for,
|
||||
acceptance of support, warranty, indemnity, or other liability obligations
|
||||
and/or rights consistent with this License. However, in accepting such
|
||||
obligations, You may act only on Your own behalf and on Your sole
|
||||
responsibility, not on behalf of any other Contributor, and only if You agree
|
||||
to indemnify, defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason of your
|
||||
accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
|
@ -1,2 +0,0 @@
|
|||
*~
|
||||
*/*~
|
File diff suppressed because it is too large
Load diff
|
@ -1,83 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// This file contains all div_sqrt_top_mvp parameters
|
||||
// Authors : Lei Li (lile@iis.ee.ethz.ch)
|
||||
|
||||
package defs_div_sqrt_mvp;
|
||||
|
||||
// op command
|
||||
localparam C_RM = 3;
|
||||
localparam C_RM_NEAREST = 3'h0;
|
||||
localparam C_RM_TRUNC = 3'h1;
|
||||
localparam C_RM_PLUSINF = 3'h2;
|
||||
localparam C_RM_MINUSINF = 3'h3;
|
||||
localparam C_PC = 6; // Precision Control
|
||||
localparam C_FS = 2; // Format Selection
|
||||
localparam C_IUNC = 2; // Iteration Unit Number Control
|
||||
localparam Iteration_unit_num_S = 2'b10;
|
||||
|
||||
// FP64
|
||||
localparam C_OP_FP64 = 64;
|
||||
localparam C_MANT_FP64 = 52;
|
||||
localparam C_EXP_FP64 = 11;
|
||||
localparam C_BIAS_FP64 = 1023;
|
||||
localparam C_BIAS_AONE_FP64 = 11'h400;
|
||||
localparam C_HALF_BIAS_FP64 = 511;
|
||||
localparam C_EXP_ZERO_FP64 = 11'h000;
|
||||
localparam C_EXP_ONE_FP64 = 13'h001; // Bit width is in agreement with in norm
|
||||
localparam C_EXP_INF_FP64 = 11'h7FF;
|
||||
localparam C_MANT_ZERO_FP64 = 52'h0;
|
||||
localparam C_MANT_NAN_FP64 = 52'h8_0000_0000_0000;
|
||||
localparam C_PZERO_FP64 = 64'h0000_0000_0000_0000;
|
||||
localparam C_MZERO_FP64 = 64'h8000_0000_0000_0000;
|
||||
localparam C_QNAN_FP64 = 64'h7FF8_0000_0000_0000;
|
||||
|
||||
// FP32
|
||||
localparam C_OP_FP32 = 32;
|
||||
localparam C_MANT_FP32 = 23;
|
||||
localparam C_EXP_FP32 = 8;
|
||||
localparam C_BIAS_FP32 = 127;
|
||||
localparam C_BIAS_AONE_FP32 = 8'h80;
|
||||
localparam C_HALF_BIAS_FP32 = 63;
|
||||
localparam C_EXP_ZERO_FP32 = 8'h00;
|
||||
localparam C_EXP_INF_FP32 = 8'hFF;
|
||||
localparam C_MANT_ZERO_FP32 = 23'h0;
|
||||
localparam C_PZERO_FP32 = 32'h0000_0000;
|
||||
localparam C_MZERO_FP32 = 32'h8000_0000;
|
||||
localparam C_QNAN_FP32 = 32'h7FC0_0000;
|
||||
|
||||
// FP16
|
||||
localparam C_OP_FP16 = 16;
|
||||
localparam C_MANT_FP16 = 10;
|
||||
localparam C_EXP_FP16 = 5;
|
||||
localparam C_BIAS_FP16 = 15;
|
||||
localparam C_BIAS_AONE_FP16 = 5'h10;
|
||||
localparam C_HALF_BIAS_FP16 = 7;
|
||||
localparam C_EXP_ZERO_FP16 = 5'h00;
|
||||
localparam C_EXP_INF_FP16 = 5'h1F;
|
||||
localparam C_MANT_ZERO_FP16 = 10'h0;
|
||||
localparam C_PZERO_FP16 = 16'h0000;
|
||||
localparam C_MZERO_FP16 = 16'h8000;
|
||||
localparam C_QNAN_FP16 = 16'h7E00;
|
||||
|
||||
// FP16alt
|
||||
localparam C_OP_FP16ALT = 16;
|
||||
localparam C_MANT_FP16ALT = 7;
|
||||
localparam C_EXP_FP16ALT = 8;
|
||||
localparam C_BIAS_FP16ALT = 127;
|
||||
localparam C_BIAS_AONE_FP16ALT = 8'h80;
|
||||
localparam C_HALF_BIAS_FP16ALT = 63;
|
||||
localparam C_EXP_ZERO_FP16ALT = 8'h00;
|
||||
localparam C_EXP_INF_FP16ALT = 8'hFF;
|
||||
localparam C_MANT_ZERO_FP16ALT = 7'h0;
|
||||
localparam C_QNAN_FP16ALT = 16'h7FC0;
|
||||
|
||||
endpackage : defs_div_sqrt_mvp
|
|
@ -1,232 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Company: IIS @ ETHZ - Federal Institute of Technology //
|
||||
// //
|
||||
// Engineers: Lei Li -- lile@iis.ee.ethz.ch //
|
||||
// //
|
||||
// Additional contributions by: //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Create Date: 20/04/2018 //
|
||||
// Design Name: FPU //
|
||||
// Module Name: div_sqrt_mvp_wrapper.sv //
|
||||
// Project Name: The shared divisor and square root //
|
||||
// Language: SystemVerilog //
|
||||
// //
|
||||
// Description: The wrapper of div_sqrt_top_mvp //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import defs_div_sqrt_mvp::*;
|
||||
|
||||
module div_sqrt_mvp_wrapper
|
||||
#(
|
||||
parameter PrePipeline_depth_S = 0, // If you want to add a flip/flop stage before preprocess, set it to 1.
|
||||
parameter PostPipeline_depth_S = 2 // The output delay stages
|
||||
)
|
||||
(//Input
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
input logic Div_start_SI,
|
||||
input logic Sqrt_start_SI,
|
||||
|
||||
//Input Operands
|
||||
input logic [C_OP_FP64-1:0] Operand_a_DI,
|
||||
input logic [C_OP_FP64-1:0] Operand_b_DI,
|
||||
|
||||
// Input Control
|
||||
input logic [C_RM-1:0] RM_SI, //Rounding Mode
|
||||
input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control
|
||||
input logic [C_FS-1:0] Format_sel_SI, // Format Selection,
|
||||
input logic Kill_SI,
|
||||
|
||||
//Output Result
|
||||
output logic [C_OP_FP64-1:0] Result_DO,
|
||||
|
||||
//Output-Flags
|
||||
output logic [4:0] Fflags_SO,
|
||||
output logic Ready_SO,
|
||||
output logic Done_SO
|
||||
);
|
||||
|
||||
|
||||
logic Div_start_S_S,Sqrt_start_S_S;
|
||||
logic [C_OP_FP64-1:0] Operand_a_S_D;
|
||||
logic [C_OP_FP64-1:0] Operand_b_S_D;
|
||||
|
||||
// Input Control
|
||||
logic [C_RM-1:0] RM_S_S; //Rounding Mode
|
||||
logic [C_PC-1:0] Precision_ctl_S_S; // Precision Control
|
||||
logic [C_FS-1:0] Format_sel_S_S; // Format Selection,
|
||||
logic Kill_S_S;
|
||||
|
||||
|
||||
logic [C_OP_FP64-1:0] Result_D;
|
||||
logic Ready_S;
|
||||
logic Done_S;
|
||||
logic [4:0] Fflags_S;
|
||||
|
||||
|
||||
generate
|
||||
if(PrePipeline_depth_S==1)
|
||||
begin
|
||||
|
||||
div_sqrt_top_mvp div_top_U0 //for RTL
|
||||
|
||||
(//Input
|
||||
.Clk_CI (Clk_CI),
|
||||
.Rst_RBI (Rst_RBI),
|
||||
.Div_start_SI (Div_start_S_S),
|
||||
.Sqrt_start_SI (Sqrt_start_S_S),
|
||||
//Input Operands
|
||||
.Operand_a_DI (Operand_a_S_D),
|
||||
.Operand_b_DI (Operand_b_S_D),
|
||||
.RM_SI (RM_S_S), //Rounding Mode
|
||||
.Precision_ctl_SI (Precision_ctl_S_S),
|
||||
.Format_sel_SI (Format_sel_S_S),
|
||||
.Kill_SI (Kill_S_S),
|
||||
.Result_DO (Result_D),
|
||||
.Fflags_SO (Fflags_S),
|
||||
.Ready_SO (Ready_S),
|
||||
.Done_SO (Done_S)
|
||||
);
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Div_start_S_S<='0;
|
||||
Sqrt_start_S_S<=1'b0;
|
||||
Operand_a_S_D<='0;
|
||||
Operand_b_S_D<='0;
|
||||
RM_S_S <=1'b0;
|
||||
Precision_ctl_S_S<='0;
|
||||
Format_sel_S_S<='0;
|
||||
Kill_S_S<='0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Div_start_S_S<=Div_start_SI;
|
||||
Sqrt_start_S_S<=Sqrt_start_SI;
|
||||
Operand_a_S_D<=Operand_a_DI;
|
||||
Operand_b_S_D<=Operand_b_DI;
|
||||
RM_S_S <=RM_SI;
|
||||
Precision_ctl_S_S<=Precision_ctl_SI;
|
||||
Format_sel_S_S<=Format_sel_SI;
|
||||
Kill_S_S<=Kill_SI;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
else
|
||||
begin
|
||||
div_sqrt_top_mvp div_top_U0 //for RTL
|
||||
(//Input
|
||||
.Clk_CI (Clk_CI),
|
||||
.Rst_RBI (Rst_RBI),
|
||||
.Div_start_SI (Div_start_SI),
|
||||
.Sqrt_start_SI (Sqrt_start_SI),
|
||||
//Input Operands
|
||||
.Operand_a_DI (Operand_a_DI),
|
||||
.Operand_b_DI (Operand_b_DI),
|
||||
.RM_SI (RM_SI), //Rounding Mode
|
||||
.Precision_ctl_SI (Precision_ctl_SI),
|
||||
.Format_sel_SI (Format_sel_SI),
|
||||
.Kill_SI (Kill_SI),
|
||||
.Result_DO (Result_D),
|
||||
.Fflags_SO (Fflags_S),
|
||||
.Ready_SO (Ready_S),
|
||||
.Done_SO (Done_S)
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// First Stage of Outputs
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
logic [C_OP_FP64-1:0] Result_dly_S_D;
|
||||
logic Ready_dly_S_S;
|
||||
logic Done_dly_S_S;
|
||||
logic [4:0] Fflags_dly_S_S;
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Result_dly_S_D<='0;
|
||||
Ready_dly_S_S<=1'b0;
|
||||
Done_dly_S_S<=1'b0;
|
||||
Fflags_dly_S_S<=1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Result_dly_S_D<=Result_D;
|
||||
Ready_dly_S_S<=Ready_S;
|
||||
Done_dly_S_S<=Done_S;
|
||||
Fflags_dly_S_S<=Fflags_S;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Second Stage of Outputs
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
logic [C_OP_FP64-1:0] Result_dly_D_D;
|
||||
logic Ready_dly_D_S;
|
||||
logic Done_dly_D_S;
|
||||
logic [4:0] Fflags_dly_D_S;
|
||||
generate
|
||||
if(PostPipeline_depth_S==2)
|
||||
begin
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Result_dly_D_D<='0;
|
||||
Ready_dly_D_S<=1'b0;
|
||||
Done_dly_D_S<=1'b0;
|
||||
Fflags_dly_D_S<=1'b0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Result_dly_D_D<=Result_dly_S_D;
|
||||
Ready_dly_D_S<=Ready_dly_S_S;
|
||||
Done_dly_D_S<=Done_dly_S_S;
|
||||
Fflags_dly_D_S<=Fflags_dly_S_S;
|
||||
end
|
||||
end
|
||||
assign Result_DO = Result_dly_D_D;
|
||||
assign Ready_SO = Ready_dly_D_S;
|
||||
assign Done_SO = Done_dly_D_S;
|
||||
assign Fflags_SO=Fflags_dly_D_S;
|
||||
end
|
||||
|
||||
else
|
||||
begin
|
||||
assign Result_DO = Result_dly_S_D;
|
||||
assign Ready_SO = Ready_dly_S_S;
|
||||
assign Done_SO = Done_dly_S_S;
|
||||
assign Fflags_SO = Fflags_dly_S_S;
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
endmodule //
|
|
@ -1,180 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Company: IIS @ ETHZ - Federal Institute of Technology //
|
||||
// //
|
||||
// Engineers: Lei Li -- lile@iis.ee.ethz.ch //
|
||||
// //
|
||||
// Additional contributions by: //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Create Date: 03/03/2018 //
|
||||
// Design Name: div_sqrt_top_mvp //
|
||||
// Module Name: div_sqrt_top_mvp.sv //
|
||||
// Project Name: The shared divisor and square root //
|
||||
// Language: SystemVerilog //
|
||||
// //
|
||||
// Description: The top of div and sqrt //
|
||||
// //
|
||||
// //
|
||||
// Revision Date: 12/04/2018 //
|
||||
// Lei Li //
|
||||
// To address some requirements by Stefan and add low power //
|
||||
// control for special cases //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import defs_div_sqrt_mvp::*;
|
||||
|
||||
module div_sqrt_top_mvp
|
||||
|
||||
(//Input
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
input logic Div_start_SI,
|
||||
input logic Sqrt_start_SI,
|
||||
|
||||
//Input Operands
|
||||
input logic [C_OP_FP64-1:0] Operand_a_DI,
|
||||
input logic [C_OP_FP64-1:0] Operand_b_DI,
|
||||
|
||||
// Input Control
|
||||
input logic [C_RM-1:0] RM_SI, //Rounding Mode
|
||||
input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control
|
||||
input logic [C_FS-1:0] Format_sel_SI, // Format Selection,
|
||||
input logic Kill_SI,
|
||||
|
||||
//Output Result
|
||||
output logic [C_OP_FP64-1:0] Result_DO,
|
||||
|
||||
//Output-Flags
|
||||
output logic [4:0] Fflags_SO,
|
||||
output logic Ready_SO,
|
||||
output logic Done_SO
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Operand components
|
||||
logic [C_EXP_FP64:0] Exp_a_D;
|
||||
logic [C_EXP_FP64:0] Exp_b_D;
|
||||
logic [C_MANT_FP64:0] Mant_a_D;
|
||||
logic [C_MANT_FP64:0] Mant_b_D;
|
||||
|
||||
logic [C_EXP_FP64+1:0] Exp_z_D;
|
||||
logic [C_MANT_FP64+4:0] Mant_z_D;
|
||||
logic Sign_z_D;
|
||||
logic Start_S;
|
||||
logic [C_RM-1:0] RM_dly_S;
|
||||
logic Div_enable_S;
|
||||
logic Sqrt_enable_S;
|
||||
logic Inf_a_S;
|
||||
logic Inf_b_S;
|
||||
logic Zero_a_S;
|
||||
logic Zero_b_S;
|
||||
logic NaN_a_S;
|
||||
logic NaN_b_S;
|
||||
logic SNaN_S;
|
||||
logic Special_case_SB,Special_case_dly_SB;
|
||||
|
||||
logic Full_precision_S;
|
||||
logic FP32_S;
|
||||
logic FP64_S;
|
||||
logic FP16_S;
|
||||
logic FP16ALT_S;
|
||||
|
||||
|
||||
preprocess_mvp preprocess_U0
|
||||
(
|
||||
.Clk_CI (Clk_CI ),
|
||||
.Rst_RBI (Rst_RBI ),
|
||||
.Div_start_SI (Div_start_SI ),
|
||||
.Sqrt_start_SI (Sqrt_start_SI ),
|
||||
.Ready_SI (Ready_SO ),
|
||||
.Operand_a_DI (Operand_a_DI ),
|
||||
.Operand_b_DI (Operand_b_DI ),
|
||||
.RM_SI (RM_SI ),
|
||||
.Format_sel_SI (Format_sel_SI ),
|
||||
.Start_SO (Start_S ),
|
||||
.Exp_a_DO_norm (Exp_a_D ),
|
||||
.Exp_b_DO_norm (Exp_b_D ),
|
||||
.Mant_a_DO_norm (Mant_a_D ),
|
||||
.Mant_b_DO_norm (Mant_b_D ),
|
||||
.RM_dly_SO (RM_dly_S ),
|
||||
.Sign_z_DO (Sign_z_D ),
|
||||
.Inf_a_SO (Inf_a_S ),
|
||||
.Inf_b_SO (Inf_b_S ),
|
||||
.Zero_a_SO (Zero_a_S ),
|
||||
.Zero_b_SO (Zero_b_S ),
|
||||
.NaN_a_SO (NaN_a_S ),
|
||||
.NaN_b_SO (NaN_b_S ),
|
||||
.SNaN_SO (SNaN_S ),
|
||||
.Special_case_SBO (Special_case_SB ),
|
||||
.Special_case_dly_SBO (Special_case_dly_SB)
|
||||
);
|
||||
|
||||
nrbd_nrsc_mvp nrbd_nrsc_U0
|
||||
(
|
||||
.Clk_CI (Clk_CI ),
|
||||
.Rst_RBI (Rst_RBI ),
|
||||
.Div_start_SI (Div_start_SI ) ,
|
||||
.Sqrt_start_SI (Sqrt_start_SI ),
|
||||
.Start_SI (Start_S ),
|
||||
.Kill_SI (Kill_SI ),
|
||||
.Special_case_SBI (Special_case_SB ),
|
||||
.Special_case_dly_SBI (Special_case_dly_SB),
|
||||
.Div_enable_SO (Div_enable_S ),
|
||||
.Sqrt_enable_SO (Sqrt_enable_S ),
|
||||
.Precision_ctl_SI (Precision_ctl_SI ),
|
||||
.Format_sel_SI (Format_sel_SI ),
|
||||
.Exp_a_DI (Exp_a_D ),
|
||||
.Exp_b_DI (Exp_b_D ),
|
||||
.Mant_a_DI (Mant_a_D ),
|
||||
.Mant_b_DI (Mant_b_D ),
|
||||
.Full_precision_SO (Full_precision_S ),
|
||||
.FP32_SO (FP32_S ),
|
||||
.FP64_SO (FP64_S ),
|
||||
.FP16_SO (FP16_S ),
|
||||
.FP16ALT_SO (FP16ALT_S ),
|
||||
.Ready_SO (Ready_SO ),
|
||||
.Done_SO (Done_SO ),
|
||||
.Exp_z_DO (Exp_z_D ),
|
||||
.Mant_z_DO (Mant_z_D )
|
||||
);
|
||||
|
||||
|
||||
norm_div_sqrt_mvp fpu_norm_U0
|
||||
(
|
||||
.Mant_in_DI (Mant_z_D ),
|
||||
.Exp_in_DI (Exp_z_D ),
|
||||
.Sign_in_DI (Sign_z_D ),
|
||||
.Div_enable_SI (Div_enable_S ),
|
||||
.Sqrt_enable_SI (Sqrt_enable_S ),
|
||||
.Inf_a_SI (Inf_a_S ),
|
||||
.Inf_b_SI (Inf_b_S ),
|
||||
.Zero_a_SI (Zero_a_S ),
|
||||
.Zero_b_SI (Zero_b_S ),
|
||||
.NaN_a_SI (NaN_a_S ),
|
||||
.NaN_b_SI (NaN_b_S ),
|
||||
.SNaN_SI (SNaN_S ),
|
||||
.RM_SI (RM_dly_S ),
|
||||
.Full_precision_SI (Full_precision_S ),
|
||||
.FP32_SI (FP32_S ),
|
||||
.FP64_SI (FP64_S ),
|
||||
.FP16_SI (FP16_S ),
|
||||
.FP16ALT_SI (FP16ALT_S ),
|
||||
.Result_DO (Result_DO ),
|
||||
.Fflags_SO (Fflags_SO ) //{NV,DZ,OF,UF,NX}
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,61 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Company: IIS @ ETHZ - Federal Institute of Technology //
|
||||
// //
|
||||
// Engineers: Lei Li lile@iis.ee.ethz.ch //
|
||||
// //
|
||||
// Additional contributions by: //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Create Date: 12/01/2017 //
|
||||
// Design Name: FPU //
|
||||
// Module Name: iteration_div_sqrt_mvp //
|
||||
// Project Name: Private FPU //
|
||||
// Language: SystemVerilog //
|
||||
// //
|
||||
// Description: iteration unit for div and sqrt //
|
||||
// //
|
||||
// //
|
||||
// Revision: 03/14/2018 //
|
||||
// For div_sqrt_mvp //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module iteration_div_sqrt_mvp
|
||||
#(
|
||||
parameter WIDTH=25
|
||||
)
|
||||
(//Input
|
||||
|
||||
input logic [WIDTH-1:0] A_DI,
|
||||
input logic [WIDTH-1:0] B_DI,
|
||||
input logic Div_enable_SI,
|
||||
input logic Div_start_dly_SI,
|
||||
input logic Sqrt_enable_SI,
|
||||
input logic [1:0] D_DI,
|
||||
|
||||
output logic [1:0] D_DO,
|
||||
output logic [WIDTH-1:0] Sum_DO,
|
||||
output logic Carry_out_DO
|
||||
);
|
||||
|
||||
logic D_carry_D;
|
||||
logic Sqrt_cin_D;
|
||||
logic Cin_D;
|
||||
|
||||
assign D_DO[0]=~D_DI[0];
|
||||
assign D_DO[1]=~(D_DI[1] ^ D_DI[0]);
|
||||
assign D_carry_D=D_DI[1] | D_DI[0];
|
||||
assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D;
|
||||
assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D;
|
||||
assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D;
|
||||
|
||||
endmodule
|
|
@ -1,470 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Company: IIS @ ETHZ - Federal Institute of Technology //
|
||||
// //
|
||||
// Engineers: Lei Li lile@iis.ee.ethz.ch //
|
||||
// //
|
||||
// Additional contributions by: //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Create Date: 09/03/2018 //
|
||||
// Design Name: FPU //
|
||||
// Module Name: norm_div_sqrt_mvp.sv //
|
||||
// Project Name: //
|
||||
// Language: SystemVerilog //
|
||||
// //
|
||||
// Description: Floating point Normalizer/Rounding unit //
|
||||
// Since this module is design as a combinatinal logic, it can//
|
||||
// be added arbinary register stages for different frequency //
|
||||
// in the wrapper module. //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Revision Date: 12/04/2018 //
|
||||
// Lei Li //
|
||||
// To address some requirements by Stefan //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import defs_div_sqrt_mvp::*;
|
||||
|
||||
module norm_div_sqrt_mvp
|
||||
(//Inputs
|
||||
input logic [C_MANT_FP64+4:0] Mant_in_DI, // Include the needed 4-bit for rounding and hidden bit
|
||||
input logic signed [C_EXP_FP64+1:0] Exp_in_DI,
|
||||
input logic Sign_in_DI,
|
||||
input logic Div_enable_SI,
|
||||
input logic Sqrt_enable_SI,
|
||||
input logic Inf_a_SI,
|
||||
input logic Inf_b_SI,
|
||||
input logic Zero_a_SI,
|
||||
input logic Zero_b_SI,
|
||||
input logic NaN_a_SI,
|
||||
input logic NaN_b_SI,
|
||||
input logic SNaN_SI,
|
||||
input logic [C_RM-1:0] RM_SI,
|
||||
input logic Full_precision_SI,
|
||||
input logic FP32_SI,
|
||||
input logic FP64_SI,
|
||||
input logic FP16_SI,
|
||||
input logic FP16ALT_SI,
|
||||
//Outputs
|
||||
output logic [C_EXP_FP64+C_MANT_FP64:0] Result_DO,
|
||||
output logic [4:0] Fflags_SO //{NV,DZ,OF,UF,NX}
|
||||
);
|
||||
|
||||
|
||||
logic Sign_res_D;
|
||||
|
||||
logic NV_OP_S;
|
||||
logic Exp_OF_S;
|
||||
logic Exp_UF_S;
|
||||
logic Div_Zero_S;
|
||||
logic In_Exact_S;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
logic [C_MANT_FP64:0] Mant_res_norm_D;
|
||||
logic [C_EXP_FP64-1:0] Exp_res_norm_D;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Right shift operations for negtive exponents //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
logic [C_EXP_FP64+1:0] Exp_Max_RS_FP64_D;
|
||||
logic [C_EXP_FP32+1:0] Exp_Max_RS_FP32_D;
|
||||
logic [C_EXP_FP16+1:0] Exp_Max_RS_FP16_D;
|
||||
logic [C_EXP_FP16ALT+1:0] Exp_Max_RS_FP16ALT_D;
|
||||
//
|
||||
assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative
|
||||
assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative
|
||||
assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative
|
||||
assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative
|
||||
logic [C_EXP_FP64+1:0] Num_RS_D;
|
||||
assign Num_RS_D=~Exp_in_DI+1+1; // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative
|
||||
logic [C_MANT_FP64:0] Mant_RS_D;
|
||||
logic [C_MANT_FP64+4:0] Mant_forsticky_D;
|
||||
assign {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); //
|
||||
//
|
||||
logic [C_EXP_FP64+1:0] Exp_subOne_D;
|
||||
assign Exp_subOne_D = Exp_in_DI -1;
|
||||
|
||||
//normalization
|
||||
logic [1:0] Mant_lower_D;
|
||||
logic Mant_sticky_bit_D;
|
||||
logic [C_MANT_FP64+4:0] Mant_forround_D;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
|
||||
if(NaN_a_SI) // if a is NaN, return NaN
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=1'b0;
|
||||
NV_OP_S = SNaN_SI;
|
||||
end
|
||||
|
||||
else if(NaN_b_SI) //if b is NaN, return NaN
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=1'b0;
|
||||
NV_OP_S = SNaN_SI;
|
||||
end
|
||||
|
||||
else if(Inf_a_SI)
|
||||
begin
|
||||
if(Div_enable_SI&&Inf_b_SI) //Inf/Inf, retrurn NaN
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=1'b0;
|
||||
NV_OP_S = 1'b1;
|
||||
end
|
||||
else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf)
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=1'b0;
|
||||
NV_OP_S = 1'b1;
|
||||
end else begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b1;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D= '0;
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
else if(Div_enable_SI&&Inf_b_SI)
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b1;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D= '0;
|
||||
Exp_res_norm_D='0;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
else if(Zero_a_SI)
|
||||
begin
|
||||
if(Div_enable_SI&&Zero_b_SI)
|
||||
begin
|
||||
Div_Zero_S=1'b1;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=1'b0;
|
||||
NV_OP_S = 1'b1;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D='0;
|
||||
Exp_res_norm_D='0;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
else if(Div_enable_SI&&(Zero_b_SI)) //div Zero
|
||||
begin
|
||||
Div_Zero_S=1'b1;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D='0;
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
else if(Sign_in_DI&&Sqrt_enable_SI) //sqrt(-a)
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D={1'b0,C_MANT_NAN_FP64};
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=1'b0;
|
||||
NV_OP_S = 1'b1;
|
||||
end
|
||||
|
||||
else if((Exp_in_DI[C_EXP_FP64:0]=='0))
|
||||
begin
|
||||
if(Mant_in_DI!='0) //Exp=0, Mant!=0, it is denormal
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b1;
|
||||
Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]};
|
||||
Exp_res_norm_D='0;
|
||||
Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} };
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
else // Zero
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D='0;
|
||||
Exp_res_norm_D='0;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4])) //denormal
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b1;
|
||||
Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4];
|
||||
Exp_res_norm_D='0;
|
||||
Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
else if(Exp_in_DI[C_EXP_FP64+1]) //minus //consider format
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b1;
|
||||
Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]};
|
||||
Exp_res_norm_D='0;
|
||||
Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]}; //??
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) ) //OF
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b1;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D='0;
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) | ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255
|
||||
begin
|
||||
if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
|
||||
Exp_res_norm_D=Exp_subOne_D;
|
||||
Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
else if(Mant_in_DI!='0) //NaN
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b1;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D= '0;
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
else //infinity
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b1;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D= '0;
|
||||
Exp_res_norm_D='1;
|
||||
Mant_forround_D='0;
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
else if(Mant_in_DI[C_MANT_FP64+4]) //normal numbers with 1.XXX
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4];
|
||||
Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0];
|
||||
Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}};
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
else //normal numbers with 0.1XX
|
||||
begin
|
||||
Div_Zero_S=1'b0;
|
||||
Exp_OF_S=1'b0;
|
||||
Exp_UF_S=1'b0;
|
||||
Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3];
|
||||
Exp_res_norm_D=Exp_subOne_D;
|
||||
Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}};
|
||||
Sign_res_D=Sign_in_DI;
|
||||
NV_OP_S = 1'b0;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding enable only for full precision (Full_precision_SI==1'b1) //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
logic [C_MANT_FP64:0] Mant_upper_D;
|
||||
logic [C_MANT_FP64+1:0] Mant_upperRounded_D;
|
||||
logic Mant_roundUp_S;
|
||||
logic Mant_rounded_S;
|
||||
|
||||
always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D
|
||||
begin
|
||||
if(FP32_SI)
|
||||
begin
|
||||
Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} };
|
||||
Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2];
|
||||
Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0];
|
||||
end
|
||||
else if(FP64_SI)
|
||||
begin
|
||||
Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0];
|
||||
Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3];
|
||||
Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0];
|
||||
end
|
||||
else if(FP16_SI)
|
||||
begin
|
||||
Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} };
|
||||
Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2];
|
||||
Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30];
|
||||
end
|
||||
else //FP16ALT
|
||||
begin
|
||||
Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} };
|
||||
Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2];
|
||||
Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30];
|
||||
end
|
||||
end
|
||||
|
||||
assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D;
|
||||
|
||||
|
||||
|
||||
|
||||
always_comb //determine whether to round up or not
|
||||
begin
|
||||
Mant_roundUp_S = 1'b0;
|
||||
case (RM_SI)
|
||||
C_RM_NEAREST :
|
||||
Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) );
|
||||
C_RM_TRUNC :
|
||||
Mant_roundUp_S = 0;
|
||||
C_RM_PLUSINF :
|
||||
Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI;
|
||||
C_RM_MINUSINF:
|
||||
Mant_roundUp_S = Mant_rounded_S & Sign_in_DI;
|
||||
default :
|
||||
Mant_roundUp_S = 0;
|
||||
endcase // case (RM_DI)
|
||||
end // always_comb begin
|
||||
|
||||
logic Mant_renorm_S;
|
||||
logic [C_MANT_FP64:0] Mant_roundUp_Vector_S; // for all the formats
|
||||
|
||||
assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)};
|
||||
|
||||
|
||||
assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S;
|
||||
assign Mant_renorm_S = Mant_upperRounded_D[C_MANT_FP64+1];
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Renormalization for Rounding //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
logic [C_MANT_FP64-1:0] Mant_res_round_D;
|
||||
logic [C_EXP_FP64-1:0] Exp_res_round_D;
|
||||
|
||||
|
||||
assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit
|
||||
assign Exp_res_round_D = Exp_res_norm_D+Mant_renorm_S;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Output Assignments //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
logic [C_MANT_FP64-1:0] Mant_before_format_ctl_D;
|
||||
logic [C_EXP_FP64-1:0] Exp_before_format_ctl_D;
|
||||
assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D;
|
||||
assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D;
|
||||
|
||||
always_comb //NaN Boxing
|
||||
begin //
|
||||
if(FP32_SI)
|
||||
begin
|
||||
Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]};
|
||||
end
|
||||
else if(FP64_SI)
|
||||
begin
|
||||
Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]};
|
||||
end
|
||||
else if(FP16_SI)
|
||||
begin
|
||||
Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]};
|
||||
end
|
||||
else
|
||||
begin
|
||||
Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]};
|
||||
end
|
||||
end
|
||||
|
||||
assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S;
|
||||
assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX}
|
||||
|
||||
endmodule // norm_div_sqrt_mvp
|
|
@ -1,104 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Company: IIS @ ETHZ - Federal Institute of Technology //
|
||||
// //
|
||||
// Engineers: Lei Li lile@iis.ee.ethz.ch //
|
||||
// //
|
||||
// Additional contributions by: //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Create Date: 10/04/2018 //
|
||||
// Design Name: FPU //
|
||||
// Module Name: nrbd_nrsc_mvp.sv //
|
||||
// Project Name: Private FPU //
|
||||
// Language: SystemVerilog //
|
||||
// //
|
||||
// Description: non restroring binary divisior/ square root //
|
||||
// //
|
||||
// Revision Date: 12/04/2018 //
|
||||
// Lei Li //
|
||||
// To address some requirements by Stefan and add low power //
|
||||
// control for special cases //
|
||||
// //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import defs_div_sqrt_mvp::*;
|
||||
|
||||
module nrbd_nrsc_mvp
|
||||
|
||||
(//Input
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
input logic Div_start_SI,
|
||||
input logic Sqrt_start_SI,
|
||||
input logic Start_SI,
|
||||
input logic Kill_SI,
|
||||
input logic Special_case_SBI,
|
||||
input logic Special_case_dly_SBI,
|
||||
input logic [C_PC-1:0] Precision_ctl_SI,
|
||||
input logic [1:0] Format_sel_SI,
|
||||
input logic [C_MANT_FP64:0] Mant_a_DI,
|
||||
input logic [C_MANT_FP64:0] Mant_b_DI,
|
||||
input logic [C_EXP_FP64:0] Exp_a_DI,
|
||||
input logic [C_EXP_FP64:0] Exp_b_DI,
|
||||
//output
|
||||
output logic Div_enable_SO,
|
||||
output logic Sqrt_enable_SO,
|
||||
|
||||
output logic Full_precision_SO,
|
||||
output logic FP32_SO,
|
||||
output logic FP64_SO,
|
||||
output logic FP16_SO,
|
||||
output logic FP16ALT_SO,
|
||||
output logic Ready_SO,
|
||||
output logic Done_SO,
|
||||
output logic [C_MANT_FP64+4:0] Mant_z_DO,
|
||||
output logic [C_EXP_FP64+1:0] Exp_z_DO
|
||||
);
|
||||
|
||||
|
||||
logic Div_start_dly_S,Sqrt_start_dly_S;
|
||||
|
||||
|
||||
control_mvp control_U0
|
||||
( .Clk_CI (Clk_CI ),
|
||||
.Rst_RBI (Rst_RBI ),
|
||||
.Div_start_SI (Div_start_SI ),
|
||||
.Sqrt_start_SI (Sqrt_start_SI ),
|
||||
.Start_SI (Start_SI ),
|
||||
.Kill_SI (Kill_SI ),
|
||||
.Special_case_SBI (Special_case_SBI ),
|
||||
.Special_case_dly_SBI (Special_case_dly_SBI ),
|
||||
.Precision_ctl_SI (Precision_ctl_SI ),
|
||||
.Format_sel_SI (Format_sel_SI ),
|
||||
.Numerator_DI (Mant_a_DI ),
|
||||
.Exp_num_DI (Exp_a_DI ),
|
||||
.Denominator_DI (Mant_b_DI ),
|
||||
.Exp_den_DI (Exp_b_DI ),
|
||||
.Div_start_dly_SO (Div_start_dly_S ),
|
||||
.Sqrt_start_dly_SO (Sqrt_start_dly_S ),
|
||||
.Div_enable_SO (Div_enable_SO ),
|
||||
.Sqrt_enable_SO (Sqrt_enable_SO ),
|
||||
.Full_precision_SO (Full_precision_SO ),
|
||||
.FP32_SO (FP32_SO ),
|
||||
.FP64_SO (FP64_SO ),
|
||||
.FP16_SO (FP16_SO ),
|
||||
.FP16ALT_SO (FP16ALT_SO ),
|
||||
.Ready_SO (Ready_SO ),
|
||||
.Done_SO (Done_SO ),
|
||||
.Mant_result_prenorm_DO (Mant_z_DO ),
|
||||
.Exp_result_prenorm_DO (Exp_z_DO )
|
||||
);
|
||||
|
||||
|
||||
|
||||
endmodule
|
|
@ -1,425 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the “License”); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Company: IIS @ ETHZ - Federal Institute of Technology //
|
||||
// //
|
||||
// Engineers: Lei Li //lile@iis.ee.ethz.ch //
|
||||
// //
|
||||
// Additional contributions by: //
|
||||
// //
|
||||
// //
|
||||
// //
|
||||
// Create Date: 01/03/2018 //
|
||||
// Design Name: FPU //
|
||||
// Module Name: preprocess_mvp.sv //
|
||||
// Project Name: Private FPU //
|
||||
// Language: SystemVerilog //
|
||||
// //
|
||||
// Description: decode and data preparation //
|
||||
// //
|
||||
// Revision Date: 12/04/2018 //
|
||||
// Lei Li //
|
||||
// To address some requirements by Stefan and add low power //
|
||||
// control for special cases //
|
||||
// //
|
||||
// //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import defs_div_sqrt_mvp::*;
|
||||
|
||||
module preprocess_mvp
|
||||
(
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
input logic Div_start_SI,
|
||||
input logic Sqrt_start_SI,
|
||||
input logic Ready_SI,
|
||||
//Input Operands
|
||||
input logic [C_OP_FP64-1:0] Operand_a_DI,
|
||||
input logic [C_OP_FP64-1:0] Operand_b_DI,
|
||||
input logic [C_RM-1:0] RM_SI, //Rounding Mode
|
||||
input logic [C_FS-1:0] Format_sel_SI, // Format Selection
|
||||
|
||||
// to control
|
||||
output logic Start_SO,
|
||||
output logic [C_EXP_FP64:0] Exp_a_DO_norm,
|
||||
output logic [C_EXP_FP64:0] Exp_b_DO_norm,
|
||||
output logic [C_MANT_FP64:0] Mant_a_DO_norm,
|
||||
output logic [C_MANT_FP64:0] Mant_b_DO_norm,
|
||||
|
||||
output logic [C_RM-1:0] RM_dly_SO,
|
||||
|
||||
output logic Sign_z_DO,
|
||||
output logic Inf_a_SO,
|
||||
output logic Inf_b_SO,
|
||||
output logic Zero_a_SO,
|
||||
output logic Zero_b_SO,
|
||||
output logic NaN_a_SO,
|
||||
output logic NaN_b_SO,
|
||||
output logic SNaN_SO,
|
||||
output logic Special_case_SBO,
|
||||
output logic Special_case_dly_SBO
|
||||
);
|
||||
|
||||
//Hidden Bits
|
||||
logic Hb_a_D;
|
||||
logic Hb_b_D;
|
||||
|
||||
logic [C_EXP_FP64-1:0] Exp_a_D;
|
||||
logic [C_EXP_FP64-1:0] Exp_b_D;
|
||||
logic [C_MANT_FP64-1:0] Mant_a_NonH_D;
|
||||
logic [C_MANT_FP64-1:0] Mant_b_NonH_D;
|
||||
logic [C_MANT_FP64:0] Mant_a_D;
|
||||
logic [C_MANT_FP64:0] Mant_b_D;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Disassemble operands
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
logic Sign_a_D,Sign_b_D;
|
||||
logic Start_S;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
case(Format_sel_SI)
|
||||
2'b00:
|
||||
begin
|
||||
Sign_a_D = Operand_a_DI[C_OP_FP32-1];
|
||||
Sign_b_D = Operand_b_DI[C_OP_FP32-1];
|
||||
Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]};
|
||||
Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]};
|
||||
Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0};
|
||||
Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0};
|
||||
end
|
||||
2'b01:
|
||||
begin
|
||||
Sign_a_D = Operand_a_DI[C_OP_FP64-1];
|
||||
Sign_b_D = Operand_b_DI[C_OP_FP64-1];
|
||||
Exp_a_D = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64];
|
||||
Exp_b_D = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64];
|
||||
Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0];
|
||||
Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0];
|
||||
end
|
||||
2'b10:
|
||||
begin
|
||||
Sign_a_D = Operand_a_DI[C_OP_FP16-1];
|
||||
Sign_b_D = Operand_b_DI[C_OP_FP16-1];
|
||||
Exp_a_D = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]};
|
||||
Exp_b_D = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]};
|
||||
Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0};
|
||||
Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0};
|
||||
end
|
||||
2'b11:
|
||||
begin
|
||||
Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1];
|
||||
Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1];
|
||||
Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
|
||||
Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]};
|
||||
Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0};
|
||||
Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D};
|
||||
assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D};
|
||||
|
||||
assign Hb_a_D = | Exp_a_D; // hidden bit
|
||||
assign Hb_b_D = | Exp_b_D; // hidden bit
|
||||
|
||||
assign Start_S= Div_start_SI | Sqrt_start_SI;
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// preliminary checks for infinite/zero/NaN operands //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
logic Mant_a_prenorm_zero_S;
|
||||
logic Mant_b_prenorm_zero_S;
|
||||
|
||||
logic Exp_a_prenorm_zero_S;
|
||||
logic Exp_b_prenorm_zero_S;
|
||||
assign Exp_a_prenorm_zero_S = ~Hb_a_D;
|
||||
assign Exp_b_prenorm_zero_S = ~Hb_b_D;
|
||||
|
||||
logic Exp_a_prenorm_Inf_NaN_S;
|
||||
logic Exp_b_prenorm_Inf_NaN_S;
|
||||
|
||||
logic Mant_a_prenorm_QNaN_S;
|
||||
logic Mant_a_prenorm_SNaN_S;
|
||||
logic Mant_b_prenorm_QNaN_S;
|
||||
logic Mant_b_prenorm_SNaN_S;
|
||||
|
||||
assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0]));
|
||||
assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0]));
|
||||
assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0]));
|
||||
assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0]));
|
||||
|
||||
always_comb
|
||||
begin
|
||||
case(Format_sel_SI)
|
||||
2'b00:
|
||||
begin
|
||||
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
|
||||
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32);
|
||||
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
|
||||
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32);
|
||||
end
|
||||
2'b01:
|
||||
begin
|
||||
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
|
||||
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64);
|
||||
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
|
||||
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64);
|
||||
end
|
||||
2'b10:
|
||||
begin
|
||||
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
|
||||
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16);
|
||||
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
|
||||
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16);
|
||||
end
|
||||
2'b11:
|
||||
begin
|
||||
Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
|
||||
Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT);
|
||||
Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
|
||||
Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
logic Zero_a_SN,Zero_a_SP;
|
||||
logic Zero_b_SN,Zero_b_SP;
|
||||
logic Inf_a_SN,Inf_a_SP;
|
||||
logic Inf_b_SN,Inf_b_SP;
|
||||
logic NaN_a_SN,NaN_a_SP;
|
||||
logic NaN_b_SN,NaN_b_SP;
|
||||
logic SNaN_SN,SNaN_SP;
|
||||
|
||||
assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP;
|
||||
assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP;
|
||||
assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP;
|
||||
assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP;
|
||||
assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP;
|
||||
assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP;
|
||||
assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP;
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Zero_a_SP <='0;
|
||||
Zero_b_SP <='0;
|
||||
Inf_a_SP <='0;
|
||||
Inf_b_SP <='0;
|
||||
NaN_a_SP <='0;
|
||||
NaN_b_SP <='0;
|
||||
SNaN_SP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Inf_a_SP <=Inf_a_SN;
|
||||
Inf_b_SP <=Inf_b_SN;
|
||||
Zero_a_SP <=Zero_a_SN;
|
||||
Zero_b_SP <=Zero_b_SN;
|
||||
NaN_a_SP <=NaN_a_SN;
|
||||
NaN_b_SP <=NaN_b_SN;
|
||||
SNaN_SP <= SNaN_SN;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Low power control
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN | Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI);
|
||||
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Special_case_dly_SBO <= '0;
|
||||
end
|
||||
else if((Start_S&&Ready_SI))
|
||||
begin
|
||||
Special_case_dly_SBO <= Special_case_SBO;
|
||||
end
|
||||
else if(Special_case_dly_SBO)
|
||||
begin
|
||||
Special_case_dly_SBO <= 1'b1;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Special_case_dly_SBO <= '0;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Delay sign for normalization and round //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
logic Sign_z_DN;
|
||||
logic Sign_z_DP;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
if(Div_start_SI&&Ready_SI)
|
||||
Sign_z_DN = Sign_a_D ^ Sign_b_D;
|
||||
else if(Sqrt_start_SI&&Ready_SI)
|
||||
Sign_z_DN = Sign_a_D;
|
||||
else
|
||||
Sign_z_DN = Sign_z_DP;
|
||||
end
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Sign_z_DP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Sign_z_DP <= Sign_z_DN;
|
||||
end
|
||||
end
|
||||
|
||||
logic [C_RM-1:0] RM_DN;
|
||||
logic [C_RM-1:0] RM_DP;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
if(Start_S&&Ready_SI)
|
||||
RM_DN = RM_SI;
|
||||
else
|
||||
RM_DN = RM_DP;
|
||||
end
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
RM_DP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
RM_DP <= RM_DN;
|
||||
end
|
||||
end
|
||||
assign RM_dly_SO = RM_DP;
|
||||
|
||||
logic [5:0] Mant_leadingOne_a, Mant_leadingOne_b;
|
||||
logic Mant_zero_S_a,Mant_zero_S_b;
|
||||
|
||||
lzc #(
|
||||
.WIDTH ( C_MANT_FP64+1 ),
|
||||
.MODE ( 1 )
|
||||
) LOD_Ua (
|
||||
.in_i ( Mant_a_D ),
|
||||
.cnt_o ( Mant_leadingOne_a ),
|
||||
.empty_o ( Mant_zero_S_a )
|
||||
);
|
||||
|
||||
logic [C_MANT_FP64:0] Mant_a_norm_DN,Mant_a_norm_DP;
|
||||
|
||||
assign Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP;
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Mant_a_norm_DP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Mant_a_norm_DP<=Mant_a_norm_DN;
|
||||
end
|
||||
end
|
||||
|
||||
logic [C_EXP_FP64:0] Exp_a_norm_DN,Exp_a_norm_DP;
|
||||
assign Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP; //Covering the process of denormal numbers
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Exp_a_norm_DP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Exp_a_norm_DP<=Exp_a_norm_DN;
|
||||
end
|
||||
end
|
||||
|
||||
lzc #(
|
||||
.WIDTH ( C_MANT_FP64+1 ),
|
||||
.MODE ( 1 )
|
||||
) LOD_Ub (
|
||||
.in_i ( Mant_b_D ),
|
||||
.cnt_o ( Mant_leadingOne_b ),
|
||||
.empty_o ( Mant_zero_S_b )
|
||||
);
|
||||
|
||||
|
||||
logic [C_MANT_FP64:0] Mant_b_norm_DN,Mant_b_norm_DP;
|
||||
|
||||
assign Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP;
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Mant_b_norm_DP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Mant_b_norm_DP<=Mant_b_norm_DN;
|
||||
end
|
||||
end
|
||||
|
||||
logic [C_EXP_FP64:0] Exp_b_norm_DN,Exp_b_norm_DP;
|
||||
assign Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers
|
||||
|
||||
always_ff @(posedge Clk_CI, negedge Rst_RBI)
|
||||
begin
|
||||
if(~Rst_RBI)
|
||||
begin
|
||||
Exp_b_norm_DP <= '0;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Exp_b_norm_DP<=Exp_b_norm_DN;
|
||||
end
|
||||
end
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Output assignments //
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign Start_SO=Start_S;
|
||||
assign Exp_a_DO_norm=Exp_a_norm_DP;
|
||||
assign Exp_b_DO_norm=Exp_b_norm_DP;
|
||||
assign Mant_a_DO_norm=Mant_a_norm_DP;
|
||||
assign Mant_b_DO_norm=Mant_b_norm_DP;
|
||||
assign Sign_z_DO=Sign_z_DP;
|
||||
assign Inf_a_SO=Inf_a_SP;
|
||||
assign Inf_b_SO=Inf_b_SP;
|
||||
assign Zero_a_SO=Zero_a_SP;
|
||||
assign Zero_b_SO=Zero_b_SP;
|
||||
assign NaN_a_SO=NaN_a_SP;
|
||||
assign NaN_b_SO=NaN_b_SP;
|
||||
assign SNaN_SO=SNaN_SP;
|
||||
|
||||
endmodule
|
14
vendor/openhwgroup_cvfpu.lock.hjson
vendored
14
vendor/openhwgroup_cvfpu.lock.hjson
vendored
|
@ -1,14 +0,0 @@
|
|||
// Copyright lowRISC contributors.
|
||||
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// This file is generated by the util/vendor script. Please do not modify it
|
||||
// manually.
|
||||
|
||||
{
|
||||
upstream:
|
||||
{
|
||||
url: https://github.com/openhwgroup/cvfpu
|
||||
rev: 3116391bf66660f806b45e212b9949c528b4e270
|
||||
}
|
||||
}
|
34
vendor/openhwgroup_cvfpu.vendor.hjson
vendored
34
vendor/openhwgroup_cvfpu.vendor.hjson
vendored
|
@ -1,34 +0,0 @@
|
|||
// -*- coding: utf-8 -*-
|
||||
// Copyright (C) 2022 Thales DIS France SAS
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
|
||||
// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
|
||||
|
||||
{
|
||||
// Name of the project
|
||||
name: "openhwgroup_cvfpu",
|
||||
|
||||
// Target directory: relative to the location of this script.
|
||||
target_dir: "openhwgroup/cvfpu",
|
||||
|
||||
// Upstream repository
|
||||
upstream: {
|
||||
// URL
|
||||
url: "https://github.com/openhwgroup/cvfpu",
|
||||
// revision
|
||||
rev: "v0.7.0",
|
||||
}
|
||||
|
||||
// Patch dir for local changes
|
||||
patch_dir: "patches/openhwgroup/cvfpu",
|
||||
|
||||
// Exclusions from upstream content
|
||||
exclude_from_upstream: [
|
||||
".gitmodules",
|
||||
"Bender.yml",
|
||||
"docs",
|
||||
"ips_list.yml",
|
||||
"src_files.yml",
|
||||
"tb",
|
||||
]
|
||||
}
|
||||
|
14
vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson
vendored
14
vendor/pulp-platform_fpu_div_sqrt_mvp.lock.hjson
vendored
|
@ -1,14 +0,0 @@
|
|||
// Copyright lowRISC contributors.
|
||||
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// This file is generated by the util/vendor script. Please do not modify it
|
||||
// manually.
|
||||
|
||||
{
|
||||
upstream:
|
||||
{
|
||||
url: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git
|
||||
rev: 86e1f558b3c95e91577c41b2fc452c86b04e85ac
|
||||
}
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
// -*- coding: utf-8 -*-
|
||||
// Copyright (C) 2022 Thales DIS France SAS
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0.
|
||||
// Original Author: Zbigniew Chamski (zbigniew.chamski@thalesgroup.com)
|
||||
|
||||
{
|
||||
// Name of the project
|
||||
name: "pulp_fpu_div_sqrt_mvp",
|
||||
|
||||
// Target directory: relative to the location of this script.
|
||||
target_dir: "openhwgroup/cvfpu/src/fpu_div_sqrt_mvp",
|
||||
|
||||
// Upstream repository
|
||||
upstream: {
|
||||
// URL
|
||||
url: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git",
|
||||
// revision
|
||||
rev: "v1.0.4",
|
||||
}
|
||||
|
||||
//Patch dir for local changes
|
||||
patch_dir: "patches/pulp-platform/fpu_div_sqrt_mvp",
|
||||
|
||||
// Exclusions from upstream content
|
||||
exclude_from_upstream: [
|
||||
".git",
|
||||
"Bender.yml",
|
||||
"document",
|
||||
"src_files.yml",
|
||||
]
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue