vector ISA updates

This commit is contained in:
tinebp 2024-12-05 14:43:51 -08:00
parent 5d91fe58ad
commit 6b23d290c3
13 changed files with 858 additions and 859 deletions

View file

@ -33,110 +33,103 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <assert.h>
#include <stdbool.h>
#include <internals.h>
#include <../RISCV/specialize.h>
#include <softfloat.h>
#include "softfloat_ext.h"
#include <../RISCV/specialize.h>
#include <assert.h>
#include <internals.h>
#include <softfloat.h>
#include <stdbool.h>
uint_fast16_t f16_classify( float16_t a )
{
union ui16_f16 uA;
uint_fast16_t uiA;
#ifdef __cplusplus
extern "C" {
#endif
uA.f = a;
uiA = uA.ui;
uint_fast16_t f16_classify(float16_t a) {
union ui16_f16 uA;
uint_fast16_t uiA;
uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F;
uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0;
bool sign = signF16UI( uiA );
bool fracZero = fracF16UI( uiA ) == 0;
bool isNaN = isNaNF16UI( uiA );
bool isSNaN = softfloat_isSigNaNF16UI( uiA );
uA.f = a;
uiA = uA.ui;
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
uint_fast16_t infOrNaN = expF16UI(uiA) == 0x1F;
uint_fast16_t subnormalOrZero = expF16UI(uiA) == 0;
bool sign = signF16UI(uiA);
bool fracZero = fracF16UI(uiA) == 0;
bool isNaN = isNaNF16UI(uiA);
bool isSNaN = softfloat_isSigNaNF16UI(uiA);
return (sign && infOrNaN && fracZero) << 0 |
(sign && !infOrNaN && !subnormalOrZero) << 1 |
(sign && subnormalOrZero && !fracZero) << 2 |
(sign && subnormalOrZero && fracZero) << 3 |
(!sign && infOrNaN && fracZero) << 7 |
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
(!sign && subnormalOrZero && !fracZero) << 5 |
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
(isNaN && !isSNaN) << 9;
}
uint_fast16_t f32_classify( float32_t a )
{
union ui32_f32 uA;
uint_fast32_t uiA;
uint_fast16_t f32_classify(float32_t a) {
union ui32_f32 uA;
uint_fast32_t uiA;
uA.f = a;
uiA = uA.ui;
uA.f = a;
uiA = uA.ui;
uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF;
uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0;
bool sign = signF32UI( uiA );
bool fracZero = fracF32UI( uiA ) == 0;
bool isNaN = isNaNF32UI( uiA );
bool isSNaN = softfloat_isSigNaNF32UI( uiA );
uint_fast16_t infOrNaN = expF32UI(uiA) == 0xFF;
uint_fast16_t subnormalOrZero = expF32UI(uiA) == 0;
bool sign = signF32UI(uiA);
bool fracZero = fracF32UI(uiA) == 0;
bool isNaN = isNaNF32UI(uiA);
bool isSNaN = softfloat_isSigNaNF32UI(uiA);
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
return (sign && infOrNaN && fracZero) << 0 |
(sign && !infOrNaN && !subnormalOrZero) << 1 |
(sign && subnormalOrZero && !fracZero) << 2 |
(sign && subnormalOrZero && fracZero) << 3 |
(!sign && infOrNaN && fracZero) << 7 |
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
(!sign && subnormalOrZero && !fracZero) << 5 |
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
(isNaN && !isSNaN) << 9;
}
uint_fast16_t f64_classify( float64_t a )
{
union ui64_f64 uA;
uint_fast64_t uiA;
uint_fast16_t f64_classify(float64_t a) {
union ui64_f64 uA;
uint_fast64_t uiA;
uA.f = a;
uiA = uA.ui;
uA.f = a;
uiA = uA.ui;
uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF;
uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0;
bool sign = signF64UI( uiA );
bool fracZero = fracF64UI( uiA ) == 0;
bool isNaN = isNaNF64UI( uiA );
bool isSNaN = softfloat_isSigNaNF64UI( uiA );
uint_fast16_t infOrNaN = expF64UI(uiA) == 0x7FF;
uint_fast16_t subnormalOrZero = expF64UI(uiA) == 0;
bool sign = signF64UI(uiA);
bool fracZero = fracF64UI(uiA) == 0;
bool isNaN = isNaNF64UI(uiA);
bool isSNaN = softfloat_isSigNaNF64UI(uiA);
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
return (sign && infOrNaN && fracZero) << 0 |
(sign && !infOrNaN && !subnormalOrZero) << 1 |
(sign && subnormalOrZero && !fracZero) << 2 |
(sign && subnormalOrZero && fracZero) << 3 |
(!sign && infOrNaN && fracZero) << 7 |
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
(!sign && subnormalOrZero && !fracZero) << 5 |
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
(isNaN && !isSNaN) << 9;
}
static inline uint64_t extract64(uint64_t val, int pos, int len)
{
static inline uint64_t extract64(uint64_t val, int pos, int len) {
assert(pos >= 0 && len > 0 && len <= 64 - pos);
return (val >> pos) & (~UINT64_C(0) >> (64 - len));
}
static inline uint64_t make_mask64(int pos, int len)
{
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
return (UINT64_MAX >> (64 - len)) << pos;
static inline uint64_t make_mask64(int pos, int len) {
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
return (UINT64_MAX >> (64 - len)) << pos;
}
//user needs to truncate output to required length
// user needs to truncate output to required length
static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
uint64_t exp = extract64(val, s, e);
uint64_t sig = extract64(val, 0, s);
@ -144,343 +137,320 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
const int p = 7;
static const uint8_t table[] = {
52, 51, 50, 48, 47, 46, 44, 43,
42, 41, 40, 39, 38, 36, 35, 34,
33, 32, 31, 30, 30, 29, 28, 27,
26, 25, 24, 23, 23, 22, 21, 20,
19, 19, 18, 17, 16, 16, 15, 14,
14, 13, 12, 12, 11, 10, 10, 9,
9, 8, 7, 7, 6, 6, 5, 4,
4, 3, 3, 2, 2, 1, 1, 0,
127, 125, 123, 121, 119, 118, 116, 114,
113, 111, 109, 108, 106, 105, 103, 102,
100, 99, 97, 96, 95, 93, 92, 91,
90, 88, 87, 86, 85, 84, 83, 82,
80, 79, 78, 77, 76, 75, 74, 73,
72, 71, 70, 70, 69, 68, 67, 66,
65, 64, 63, 63, 62, 61, 60, 59,
59, 58, 57, 56, 56, 55, 54, 53};
52, 51, 50, 48, 47, 46, 44, 43, 42, 41, 40, 39, 38, 36, 35,
34, 33, 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22,
21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11,
10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 4, 4, 3, 3, 2,
2, 1, 1, 0, 127, 125, 123, 121, 119, 118, 116, 114, 113, 111, 109,
108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88,
87, 86, 85, 84, 83, 82, 80, 79, 78, 77, 76, 75, 74, 73, 72,
71, 70, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 59,
59, 58, 57, 56, 56, 55, 54, 53};
if (sub) {
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
sig = (sig << 1) & make_mask64(0 ,s);
sig = (sig << 1) & make_mask64(0, s);
}
int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
int idx = ((exp & 1) << (p - 1)) | (sig >> (s - p + 1));
uint64_t out_sig = (uint64_t)(table[idx]) << (s - p);
uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
return (sign << (s+e)) | (out_exp << s) | out_sig;
return (sign << (s + e)) | (out_exp << s) | out_sig;
}
float16_t f16_rsqrte7(float16_t in)
{
union ui16_f16 uA;
float16_t f16_rsqrte7(float16_t in) {
union ui16_f16 uA;
uA.f = in;
unsigned int ret = f16_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
break;
}
uA.f = in;
unsigned bool sub = false;
switch (ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
break;
}
return uA.f;
return uA.f;
}
float32_t f32_rsqrte7(float32_t in)
{
union ui32_f32 uA;
float32_t f32_rsqrte7(float32_t in) {
union ui32_f32 uA;
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
break;
}
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
switch (ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
break;
}
return uA.f;
return uA.f;
}
float64_t f64_rsqrte7(float64_t in)
{
union ui64_f64 uA;
float64_t f64_rsqrte7(float64_t in) {
union ui64_f64 uA;
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
break;
}
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
switch (ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
break;
}
return uA.f;
return uA.f;
}
//user needs to truncate output to required length
// user needs to truncate output to required length
static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
bool *round_abnormal)
{
uint64_t exp = extract64(val, s, e);
uint64_t sig = extract64(val, 0, s);
uint64_t sign = extract64(val, s + e, 1);
const int p = 7;
bool *round_abnormal) {
uint64_t exp = extract64(val, s, e);
uint64_t sig = extract64(val, 0, s);
uint64_t sign = extract64(val, s + e, 1);
const int p = 7;
static const uint8_t table[] = {
127, 125, 123, 121, 119, 117, 116, 114,
112, 110, 109, 107, 105, 104, 102, 100,
99, 97, 96, 94, 93, 91, 90, 88,
87, 85, 84, 83, 81, 80, 79, 77,
76, 75, 74, 72, 71, 70, 69, 68,
66, 65, 64, 63, 62, 61, 60, 59,
58, 57, 56, 55, 54, 53, 52, 51,
50, 49, 48, 47, 46, 45, 44, 43,
42, 41, 40, 40, 39, 38, 37, 36,
35, 35, 34, 33, 32, 31, 31, 30,
29, 28, 28, 27, 26, 25, 25, 24,
23, 23, 22, 21, 21, 20, 19, 19,
18, 17, 17, 16, 15, 15, 14, 14,
13, 12, 12, 11, 11, 10, 9, 9,
8, 8, 7, 7, 6, 5, 5, 4,
4, 3, 3, 2, 2, 1, 1, 0};
static const uint8_t table[] = {
127, 125, 123, 121, 119, 117, 116, 114, 112, 110, 109, 107, 105, 104, 102,
100, 99, 97, 96, 94, 93, 91, 90, 88, 87, 85, 84, 83, 81, 80,
79, 77, 76, 75, 74, 72, 71, 70, 69, 68, 66, 65, 64, 63, 62,
61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47,
46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34,
33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23,
22, 21, 21, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13,
12, 12, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 5, 5, 4,
4, 3, 3, 2, 2, 1, 1, 0};
if (sub) {
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
if (sub) {
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
sig = (sig << 1) & make_mask64(0 ,s);
sig = (sig << 1) & make_mask64(0, s);
if (exp != 0 && exp != UINT64_MAX) {
*round_abnormal = true;
if (rm == 1 ||
(rm == 2 && !sign) ||
(rm == 3 && sign))
return ((sign << (s+e)) | make_mask64(s, e)) - 1;
else
return (sign << (s+e)) | make_mask64(s, e);
}
if (exp != 0 && exp != UINT64_MAX) {
*round_abnormal = true;
if (rm == 1 || (rm == 2 && !sign) || (rm == 3 && sign))
return ((sign << (s + e)) | make_mask64(s, e)) - 1;
else
return (sign << (s + e)) | make_mask64(s, e);
}
}
int idx = sig >> (s-p);
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
if (out_exp == 0 || out_exp == UINT64_MAX) {
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
if (out_exp == UINT64_MAX) {
out_sig >>= 1;
out_exp = 0;
}
int idx = sig >> (s - p);
uint64_t out_sig = (uint64_t)(table[idx]) << (s - p);
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
if (out_exp == 0 || out_exp == UINT64_MAX) {
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
if (out_exp == UINT64_MAX) {
out_sig >>= 1;
out_exp = 0;
}
}
return (sign << (s+e)) | (out_exp << s) | out_sig;
return (sign << (s + e)) | (out_exp << s) | out_sig;
}
float16_t f16_recip7(float16_t in)
{
union ui16_f16 uA;
float16_t f16_recip7(float16_t in) {
union ui16_f16 uA;
uA.f = in;
unsigned int ret = f16_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x8000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 5, 10,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
uA.f = in;
unsigned int ret = f16_classify(in);
bool sub = false;
bool round_abnormal = false;
switch (ret) {
case 0x001: // -inf
uA.ui = 0x8000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 5, 10, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
break;
}
return uA.f;
return uA.f;
}
float32_t f32_recip7(float32_t in)
{
union ui32_f32 uA;
float32_t f32_recip7(float32_t in) {
union ui32_f32 uA;
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x80000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 8, 23,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
bool round_abnormal = false;
switch (ret) {
case 0x001: // -inf
uA.ui = 0x80000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 8, 23, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
break;
}
return uA.f;
return uA.f;
}
float64_t f64_recip7(float64_t in)
{
union ui64_f64 uA;
float64_t f64_recip7(float64_t in) {
union ui64_f64 uA;
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x8000000000000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 11, 52,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
bool round_abnormal = false;
switch (ret) {
case 0x001: // -inf
uA.ui = 0x8000000000000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 11, 52, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
break;
}
return uA.f;
}
return uA.f;
}
#ifdef __cplusplus
}
#endif