mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
vector ISA updates
This commit is contained in:
parent
5d91fe58ad
commit
6b23d290c3
13 changed files with 858 additions and 859 deletions
|
@ -394,7 +394,7 @@ vector()
|
|||
{
|
||||
echo "begin vector tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/simx clean && CONFIGS="-DEXT_V_ENABLE" make -C sim/simx
|
||||
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh
|
||||
|
||||
echo "vector tests done!"
|
||||
|
|
|
@ -830,6 +830,12 @@
|
|||
`define EXT_M_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_V_ENABLE
|
||||
`define EXT_V_ENABLED 1
|
||||
`else
|
||||
`define EXT_V_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
`define EXT_ZICOND_ENABLED 1
|
||||
`else
|
||||
|
@ -846,7 +852,7 @@
|
|||
`define ISA_STD_N 13
|
||||
`define ISA_STD_Q 16
|
||||
`define ISA_STD_S 18
|
||||
`define ISA_STD_U 20
|
||||
`define ISA_STD_V 21
|
||||
|
||||
`define ISA_EXT_ICACHE 0
|
||||
`define ISA_EXT_DCACHE 1
|
||||
|
@ -883,7 +889,7 @@
|
|||
| (0 << 18) /* S - Supervisor mode implemented */ \
|
||||
| (0 << 19) /* T - Tentatively reserved for Transactional Memory extension */ \
|
||||
| (1 << 20) /* U - User mode implemented */ \
|
||||
| (0 << 21) /* V - Tentatively reserved for Vector extension */ \
|
||||
| (`EXT_V_ENABLED << 21) /* V - Tentatively reserved for Vector extension */ \
|
||||
| (0 << 22) /* W - Reserved */ \
|
||||
| (1 << 23) /* X - Non-standard extensions present */ \
|
||||
| (0 << 24) /* Y - Reserved */ \
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -12,11 +12,11 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "rvfloats.h"
|
||||
#include "softfloat_ext.h"
|
||||
#include <stdio.h>
|
||||
|
||||
extern "C" {
|
||||
#include <softfloat.h>
|
||||
#include "softfloat_ext.h"
|
||||
#include <internals.h>
|
||||
#include <../RISCV/specialize.h>
|
||||
}
|
||||
|
@ -344,7 +344,7 @@ bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
|||
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_eq(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -355,11 +355,11 @@ bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
|||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
auto fa = to_float32_t(a);
|
||||
auto fb = to_float32_t(b);
|
||||
|
@ -374,11 +374,11 @@ uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
|||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
auto fa = to_float64_t(a);
|
||||
auto fb = to_float64_t(b);
|
||||
|
@ -397,7 +397,7 @@ uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
|||
uint32_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
auto fa = to_float32_t(a);
|
||||
auto fb = to_float32_t(b);
|
||||
|
@ -416,7 +416,7 @@ uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
|||
uint64_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
auto fa = to_float64_t(a);
|
||||
auto fb = to_float64_t(b);
|
||||
|
@ -449,8 +449,8 @@ uint32_t rv_fclss_s(uint32_t a) {
|
|||
( !sign && subnormOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -472,8 +472,8 @@ uint32_t rv_fclss_d(uint64_t a) {
|
|||
( !sign && subnormOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -483,7 +483,7 @@ uint32_t rv_fsgnj_s(uint32_t a, uint32_t b) {
|
|||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) {
|
||||
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) {
|
||||
auto sign = b & F64_SIGN;
|
||||
auto r = sign | (a & ~F64_SIGN);
|
||||
return r;
|
||||
|
@ -495,7 +495,7 @@ uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b) {
|
|||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) {
|
||||
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) {
|
||||
auto sign = ~b & F64_SIGN;
|
||||
auto r = sign | (a & ~F64_SIGN);
|
||||
return r;
|
||||
|
@ -508,7 +508,7 @@ uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b) {
|
|||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
|
||||
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
|
||||
auto sign1 = a & F64_SIGN;
|
||||
auto sign2 = b & F64_SIGN;
|
||||
auto r = (sign1 ^ sign2) | (a & ~F64_SIGN);
|
||||
|
|
|
@ -33,110 +33,103 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
=============================================================================*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <internals.h>
|
||||
#include <../RISCV/specialize.h>
|
||||
#include <softfloat.h>
|
||||
#include "softfloat_ext.h"
|
||||
#include <../RISCV/specialize.h>
|
||||
#include <assert.h>
|
||||
#include <internals.h>
|
||||
#include <softfloat.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
uint_fast16_t f16_classify( float16_t a )
|
||||
{
|
||||
union ui16_f16 uA;
|
||||
uint_fast16_t uiA;
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
uint_fast16_t f16_classify(float16_t a) {
|
||||
union ui16_f16 uA;
|
||||
uint_fast16_t uiA;
|
||||
|
||||
uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F;
|
||||
uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0;
|
||||
bool sign = signF16UI( uiA );
|
||||
bool fracZero = fracF16UI( uiA ) == 0;
|
||||
bool isNaN = isNaNF16UI( uiA );
|
||||
bool isSNaN = softfloat_isSigNaNF16UI( uiA );
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
|
||||
return
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
|
||||
( sign && subnormalOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormalOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
|
||||
( !sign && subnormalOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormalOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
uint_fast16_t infOrNaN = expF16UI(uiA) == 0x1F;
|
||||
uint_fast16_t subnormalOrZero = expF16UI(uiA) == 0;
|
||||
bool sign = signF16UI(uiA);
|
||||
bool fracZero = fracF16UI(uiA) == 0;
|
||||
bool isNaN = isNaNF16UI(uiA);
|
||||
bool isSNaN = softfloat_isSigNaNF16UI(uiA);
|
||||
|
||||
return (sign && infOrNaN && fracZero) << 0 |
|
||||
(sign && !infOrNaN && !subnormalOrZero) << 1 |
|
||||
(sign && subnormalOrZero && !fracZero) << 2 |
|
||||
(sign && subnormalOrZero && fracZero) << 3 |
|
||||
(!sign && infOrNaN && fracZero) << 7 |
|
||||
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
|
||||
(!sign && subnormalOrZero && !fracZero) << 5 |
|
||||
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
|
||||
(isNaN && !isSNaN) << 9;
|
||||
}
|
||||
|
||||
uint_fast16_t f32_classify( float32_t a )
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
uint_fast32_t uiA;
|
||||
uint_fast16_t f32_classify(float32_t a) {
|
||||
union ui32_f32 uA;
|
||||
uint_fast32_t uiA;
|
||||
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
|
||||
uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF;
|
||||
uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0;
|
||||
bool sign = signF32UI( uiA );
|
||||
bool fracZero = fracF32UI( uiA ) == 0;
|
||||
bool isNaN = isNaNF32UI( uiA );
|
||||
bool isSNaN = softfloat_isSigNaNF32UI( uiA );
|
||||
uint_fast16_t infOrNaN = expF32UI(uiA) == 0xFF;
|
||||
uint_fast16_t subnormalOrZero = expF32UI(uiA) == 0;
|
||||
bool sign = signF32UI(uiA);
|
||||
bool fracZero = fracF32UI(uiA) == 0;
|
||||
bool isNaN = isNaNF32UI(uiA);
|
||||
bool isSNaN = softfloat_isSigNaNF32UI(uiA);
|
||||
|
||||
return
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
|
||||
( sign && subnormalOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormalOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
|
||||
( !sign && subnormalOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormalOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
return (sign && infOrNaN && fracZero) << 0 |
|
||||
(sign && !infOrNaN && !subnormalOrZero) << 1 |
|
||||
(sign && subnormalOrZero && !fracZero) << 2 |
|
||||
(sign && subnormalOrZero && fracZero) << 3 |
|
||||
(!sign && infOrNaN && fracZero) << 7 |
|
||||
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
|
||||
(!sign && subnormalOrZero && !fracZero) << 5 |
|
||||
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
|
||||
(isNaN && !isSNaN) << 9;
|
||||
}
|
||||
|
||||
uint_fast16_t f64_classify( float64_t a )
|
||||
{
|
||||
union ui64_f64 uA;
|
||||
uint_fast64_t uiA;
|
||||
uint_fast16_t f64_classify(float64_t a) {
|
||||
union ui64_f64 uA;
|
||||
uint_fast64_t uiA;
|
||||
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
|
||||
uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF;
|
||||
uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0;
|
||||
bool sign = signF64UI( uiA );
|
||||
bool fracZero = fracF64UI( uiA ) == 0;
|
||||
bool isNaN = isNaNF64UI( uiA );
|
||||
bool isSNaN = softfloat_isSigNaNF64UI( uiA );
|
||||
uint_fast16_t infOrNaN = expF64UI(uiA) == 0x7FF;
|
||||
uint_fast16_t subnormalOrZero = expF64UI(uiA) == 0;
|
||||
bool sign = signF64UI(uiA);
|
||||
bool fracZero = fracF64UI(uiA) == 0;
|
||||
bool isNaN = isNaNF64UI(uiA);
|
||||
bool isSNaN = softfloat_isSigNaNF64UI(uiA);
|
||||
|
||||
return
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
|
||||
( sign && subnormalOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormalOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
|
||||
( !sign && subnormalOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormalOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
return (sign && infOrNaN && fracZero) << 0 |
|
||||
(sign && !infOrNaN && !subnormalOrZero) << 1 |
|
||||
(sign && subnormalOrZero && !fracZero) << 2 |
|
||||
(sign && subnormalOrZero && fracZero) << 3 |
|
||||
(!sign && infOrNaN && fracZero) << 7 |
|
||||
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
|
||||
(!sign && subnormalOrZero && !fracZero) << 5 |
|
||||
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
|
||||
(isNaN && !isSNaN) << 9;
|
||||
}
|
||||
|
||||
static inline uint64_t extract64(uint64_t val, int pos, int len)
|
||||
{
|
||||
static inline uint64_t extract64(uint64_t val, int pos, int len) {
|
||||
assert(pos >= 0 && len > 0 && len <= 64 - pos);
|
||||
return (val >> pos) & (~UINT64_C(0) >> (64 - len));
|
||||
}
|
||||
|
||||
static inline uint64_t make_mask64(int pos, int len)
|
||||
{
|
||||
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
|
||||
return (UINT64_MAX >> (64 - len)) << pos;
|
||||
static inline uint64_t make_mask64(int pos, int len) {
|
||||
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
|
||||
return (UINT64_MAX >> (64 - len)) << pos;
|
||||
}
|
||||
|
||||
//user needs to truncate output to required length
|
||||
// user needs to truncate output to required length
|
||||
static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
|
||||
uint64_t exp = extract64(val, s, e);
|
||||
uint64_t sig = extract64(val, 0, s);
|
||||
|
@ -144,343 +137,320 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
|
|||
const int p = 7;
|
||||
|
||||
static const uint8_t table[] = {
|
||||
52, 51, 50, 48, 47, 46, 44, 43,
|
||||
42, 41, 40, 39, 38, 36, 35, 34,
|
||||
33, 32, 31, 30, 30, 29, 28, 27,
|
||||
26, 25, 24, 23, 23, 22, 21, 20,
|
||||
19, 19, 18, 17, 16, 16, 15, 14,
|
||||
14, 13, 12, 12, 11, 10, 10, 9,
|
||||
9, 8, 7, 7, 6, 6, 5, 4,
|
||||
4, 3, 3, 2, 2, 1, 1, 0,
|
||||
127, 125, 123, 121, 119, 118, 116, 114,
|
||||
113, 111, 109, 108, 106, 105, 103, 102,
|
||||
100, 99, 97, 96, 95, 93, 92, 91,
|
||||
90, 88, 87, 86, 85, 84, 83, 82,
|
||||
80, 79, 78, 77, 76, 75, 74, 73,
|
||||
72, 71, 70, 70, 69, 68, 67, 66,
|
||||
65, 64, 63, 63, 62, 61, 60, 59,
|
||||
59, 58, 57, 56, 56, 55, 54, 53};
|
||||
52, 51, 50, 48, 47, 46, 44, 43, 42, 41, 40, 39, 38, 36, 35,
|
||||
34, 33, 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22,
|
||||
21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11,
|
||||
10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 4, 4, 3, 3, 2,
|
||||
2, 1, 1, 0, 127, 125, 123, 121, 119, 118, 116, 114, 113, 111, 109,
|
||||
108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88,
|
||||
87, 86, 85, 84, 83, 82, 80, 79, 78, 77, 76, 75, 74, 73, 72,
|
||||
71, 70, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 59,
|
||||
59, 58, 57, 56, 56, 55, 54, 53};
|
||||
|
||||
if (sub) {
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
|
||||
sig = (sig << 1) & make_mask64(0 ,s);
|
||||
sig = (sig << 1) & make_mask64(0, s);
|
||||
}
|
||||
|
||||
int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
|
||||
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
|
||||
int idx = ((exp & 1) << (p - 1)) | (sig >> (s - p + 1));
|
||||
uint64_t out_sig = (uint64_t)(table[idx]) << (s - p);
|
||||
uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
|
||||
|
||||
return (sign << (s+e)) | (out_exp << s) | out_sig;
|
||||
return (sign << (s + e)) | (out_exp << s) | out_sig;
|
||||
}
|
||||
|
||||
float16_t f16_rsqrte7(float16_t in)
|
||||
{
|
||||
union ui16_f16 uA;
|
||||
float16_t f16_rsqrte7(float16_t in) {
|
||||
union ui16_f16 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f16_classify(in);
|
||||
bool sub = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF16UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfc00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7c00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
|
||||
break;
|
||||
}
|
||||
uA.f = in;
|
||||
unsigned bool sub = false;
|
||||
switch (ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: // qNaN
|
||||
uA.ui = defaultNaNF16UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfc00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7c00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float32_t f32_rsqrte7(float32_t in)
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
float32_t f32_rsqrte7(float32_t in) {
|
||||
union ui32_f32 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f32_classify(in);
|
||||
bool sub = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF32UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xff800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7f800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
|
||||
break;
|
||||
}
|
||||
uA.f = in;
|
||||
unsigned int ret = f32_classify(in);
|
||||
bool sub = false;
|
||||
switch (ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: // qNaN
|
||||
uA.ui = defaultNaNF32UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xff800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7f800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float64_t f64_rsqrte7(float64_t in)
|
||||
{
|
||||
union ui64_f64 uA;
|
||||
float64_t f64_rsqrte7(float64_t in) {
|
||||
union ui64_f64 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f64_classify(in);
|
||||
bool sub = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF64UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfff0000000000000ul;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7ff0000000000000ul;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
|
||||
break;
|
||||
}
|
||||
uA.f = in;
|
||||
unsigned int ret = f64_classify(in);
|
||||
bool sub = false;
|
||||
switch (ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: // qNaN
|
||||
uA.ui = defaultNaNF64UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfff0000000000000ul;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7ff0000000000000ul;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
//user needs to truncate output to required length
|
||||
// user needs to truncate output to required length
|
||||
static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
|
||||
bool *round_abnormal)
|
||||
{
|
||||
uint64_t exp = extract64(val, s, e);
|
||||
uint64_t sig = extract64(val, 0, s);
|
||||
uint64_t sign = extract64(val, s + e, 1);
|
||||
const int p = 7;
|
||||
bool *round_abnormal) {
|
||||
uint64_t exp = extract64(val, s, e);
|
||||
uint64_t sig = extract64(val, 0, s);
|
||||
uint64_t sign = extract64(val, s + e, 1);
|
||||
const int p = 7;
|
||||
|
||||
static const uint8_t table[] = {
|
||||
127, 125, 123, 121, 119, 117, 116, 114,
|
||||
112, 110, 109, 107, 105, 104, 102, 100,
|
||||
99, 97, 96, 94, 93, 91, 90, 88,
|
||||
87, 85, 84, 83, 81, 80, 79, 77,
|
||||
76, 75, 74, 72, 71, 70, 69, 68,
|
||||
66, 65, 64, 63, 62, 61, 60, 59,
|
||||
58, 57, 56, 55, 54, 53, 52, 51,
|
||||
50, 49, 48, 47, 46, 45, 44, 43,
|
||||
42, 41, 40, 40, 39, 38, 37, 36,
|
||||
35, 35, 34, 33, 32, 31, 31, 30,
|
||||
29, 28, 28, 27, 26, 25, 25, 24,
|
||||
23, 23, 22, 21, 21, 20, 19, 19,
|
||||
18, 17, 17, 16, 15, 15, 14, 14,
|
||||
13, 12, 12, 11, 11, 10, 9, 9,
|
||||
8, 8, 7, 7, 6, 5, 5, 4,
|
||||
4, 3, 3, 2, 2, 1, 1, 0};
|
||||
static const uint8_t table[] = {
|
||||
127, 125, 123, 121, 119, 117, 116, 114, 112, 110, 109, 107, 105, 104, 102,
|
||||
100, 99, 97, 96, 94, 93, 91, 90, 88, 87, 85, 84, 83, 81, 80,
|
||||
79, 77, 76, 75, 74, 72, 71, 70, 69, 68, 66, 65, 64, 63, 62,
|
||||
61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47,
|
||||
46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34,
|
||||
33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23,
|
||||
22, 21, 21, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13,
|
||||
12, 12, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 5, 5, 4,
|
||||
4, 3, 3, 2, 2, 1, 1, 0};
|
||||
|
||||
if (sub) {
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
if (sub) {
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
|
||||
sig = (sig << 1) & make_mask64(0 ,s);
|
||||
sig = (sig << 1) & make_mask64(0, s);
|
||||
|
||||
if (exp != 0 && exp != UINT64_MAX) {
|
||||
*round_abnormal = true;
|
||||
if (rm == 1 ||
|
||||
(rm == 2 && !sign) ||
|
||||
(rm == 3 && sign))
|
||||
return ((sign << (s+e)) | make_mask64(s, e)) - 1;
|
||||
else
|
||||
return (sign << (s+e)) | make_mask64(s, e);
|
||||
}
|
||||
if (exp != 0 && exp != UINT64_MAX) {
|
||||
*round_abnormal = true;
|
||||
if (rm == 1 || (rm == 2 && !sign) || (rm == 3 && sign))
|
||||
return ((sign << (s + e)) | make_mask64(s, e)) - 1;
|
||||
else
|
||||
return (sign << (s + e)) | make_mask64(s, e);
|
||||
}
|
||||
}
|
||||
|
||||
int idx = sig >> (s-p);
|
||||
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
|
||||
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
|
||||
if (out_exp == 0 || out_exp == UINT64_MAX) {
|
||||
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
|
||||
if (out_exp == UINT64_MAX) {
|
||||
out_sig >>= 1;
|
||||
out_exp = 0;
|
||||
}
|
||||
int idx = sig >> (s - p);
|
||||
uint64_t out_sig = (uint64_t)(table[idx]) << (s - p);
|
||||
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
|
||||
if (out_exp == 0 || out_exp == UINT64_MAX) {
|
||||
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
|
||||
if (out_exp == UINT64_MAX) {
|
||||
out_sig >>= 1;
|
||||
out_exp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return (sign << (s+e)) | (out_exp << s) | out_sig;
|
||||
return (sign << (s + e)) | (out_exp << s) | out_sig;
|
||||
}
|
||||
|
||||
float16_t f16_recip7(float16_t in)
|
||||
{
|
||||
union ui16_f16 uA;
|
||||
float16_t f16_recip7(float16_t in) {
|
||||
union ui16_f16 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f16_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x8000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfc00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7c00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF16UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 5, 10,
|
||||
softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact |
|
||||
softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
uA.f = in;
|
||||
unsigned int ret = f16_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch (ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x8000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfc00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7c00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: // qNaN
|
||||
uA.ui = defaultNaNF16UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 5, 10, softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |=
|
||||
softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float32_t f32_recip7(float32_t in)
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
float32_t f32_recip7(float32_t in) {
|
||||
union ui32_f32 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f32_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x80000000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xff800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7f800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF32UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 8, 23,
|
||||
softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact |
|
||||
softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
uA.f = in;
|
||||
unsigned int ret = f32_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch (ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x80000000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xff800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7f800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: // qNaN
|
||||
uA.ui = defaultNaNF32UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 8, 23, softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |=
|
||||
softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float64_t f64_recip7(float64_t in)
|
||||
{
|
||||
union ui64_f64 uA;
|
||||
float64_t f64_recip7(float64_t in) {
|
||||
union ui64_f64 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f64_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x8000000000000000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfff0000000000000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7ff0000000000000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF64UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 11, 52,
|
||||
softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact |
|
||||
softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
uA.f = in;
|
||||
unsigned int ret = f64_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch (ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x8000000000000000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfff0000000000000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7ff0000000000000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: // qNaN
|
||||
uA.ui = defaultNaNF64UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 11, 52, softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |=
|
||||
softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,14 +1,22 @@
|
|||
#include <stdint.h>
|
||||
#include <softfloat_types.h>
|
||||
|
||||
uint_fast16_t f16_classify( float16_t );
|
||||
float16_t f16_rsqrte7( float16_t );
|
||||
float16_t f16_recip7( float16_t );
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint_fast16_t f32_classify( float32_t );
|
||||
float32_t f32_rsqrte7( float32_t );
|
||||
float32_t f32_recip7( float32_t );
|
||||
uint_fast16_t f16_classify(float16_t);
|
||||
float16_t f16_rsqrte7(float16_t);
|
||||
float16_t f16_recip7(float16_t);
|
||||
|
||||
uint_fast16_t f64_classify( float64_t );
|
||||
float64_t f64_rsqrte7( float64_t );
|
||||
float64_t f64_recip7( float64_t );
|
||||
uint_fast16_t f32_classify(float32_t);
|
||||
float32_t f32_rsqrte7(float32_t);
|
||||
float32_t f32_recip7(float32_t);
|
||||
|
||||
uint_fast16_t f64_classify(float64_t);
|
||||
float64_t f64_rsqrte7(float64_t);
|
||||
float64_t f64_recip7(float64_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,10 +16,10 @@
|
|||
|
||||
// return file extension
|
||||
const char* fileExtension(const char* filepath) {
|
||||
const char *ext = strrchr(filepath, '.');
|
||||
if (ext == NULL || ext == filepath)
|
||||
return "";
|
||||
return ext + 1;
|
||||
const char *ext = strrchr(filepath, '.');
|
||||
if (ext == NULL || ext == filepath)
|
||||
return "";
|
||||
return ext + 1;
|
||||
}
|
||||
|
||||
void* aligned_malloc(size_t size, size_t alignment) {
|
||||
|
|
|
@ -18,7 +18,12 @@ LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
|||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/execute_vector.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
|
||||
# Add V extension sources
|
||||
ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),)
|
||||
SRCS += $(SRC_DIR)/execute_v.cpp
|
||||
endif
|
||||
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
|
|
|
@ -390,7 +390,7 @@ static const char* op_string(const Instr &instr) {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
|
||||
case Opcode::TCU:
|
||||
switch(func3)
|
||||
{
|
||||
|
@ -405,36 +405,31 @@ static const char* op_string(const Instr &instr) {
|
|||
}
|
||||
}
|
||||
|
||||
inline void vec_log(std::ostream &os, const Instr &instr) {
|
||||
if (instr.getVUseMask() & set_func3)
|
||||
os << ", func3:" << instr.getFunc3();
|
||||
if (instr.getVUseMask() & set_func6)
|
||||
os << ", func6:" << instr.getFunc6();
|
||||
if (instr.getVUseMask() & set_imm)
|
||||
os << ", imm:" << instr.getImm();
|
||||
if (instr.getVUseMask() & set_vlswidth)
|
||||
inline void print_vec_attr(std::ostream &os, const Instr &instr) {
|
||||
uint32_t mask = instr.getVattrMask();
|
||||
if (mask & vattr_vlswidth)
|
||||
os << ", width:" << instr.getVlsWidth();
|
||||
if (instr.getVUseMask() & set_vmop)
|
||||
if (mask & vattr_vmop)
|
||||
os << ", mop:" << instr.getVmop();
|
||||
if (instr.getVUseMask() & set_vumop)
|
||||
if (mask & vattr_vumop)
|
||||
os << ", umop:" << instr.getVumop();
|
||||
if (instr.getVUseMask() & set_vnf)
|
||||
if (mask & vattr_vnf)
|
||||
os << ", nf:" << instr.getVnf();
|
||||
if (instr.getVUseMask() & set_vmask)
|
||||
if (mask & vattr_vmask)
|
||||
os << ", vmask:" << instr.getVmask();
|
||||
if (instr.getVUseMask() & set_vs3)
|
||||
if (mask & vattr_vs3)
|
||||
os << ", vs3:" << instr.getVs3();
|
||||
if (instr.getVUseMask() & set_zimm)
|
||||
if (mask & vattr_zimm)
|
||||
os << ", zimm:" << ((instr.hasZimm()) ? "true" : "false");
|
||||
if (instr.getVUseMask() & set_vlmul)
|
||||
if (mask & vattr_vlmul)
|
||||
os << ", lmul:" << instr.getVlmul();
|
||||
if (instr.getVUseMask() & set_vsew)
|
||||
if (mask & vattr_vsew)
|
||||
os << ", sew:" << instr.getVsew();
|
||||
if (instr.getVUseMask() & set_vta)
|
||||
if (mask & vattr_vta)
|
||||
os << ", ta:" << instr.getVta();
|
||||
if (instr.getVUseMask() & set_vma)
|
||||
if (mask & vattr_vma)
|
||||
os << ", ma:" << instr.getVma();
|
||||
if (instr.getVUseMask() & set_vediv)
|
||||
if (mask & vattr_vediv)
|
||||
os << ", ediv:" << instr.getVediv();
|
||||
}
|
||||
|
||||
|
@ -463,8 +458,10 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
|||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getRSrc(0);
|
||||
}
|
||||
// Log vector-specific vtype and vreg info
|
||||
if (instr.isVec()) vec_log(os, instr);
|
||||
// Log vector-specific attributes
|
||||
if (instr.getVattrMask() != 0) {
|
||||
print_vec_attr(os, instr);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
@ -478,6 +475,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
auto func3 = (code >> shift_func3) & mask_func3;
|
||||
auto func6 = (code >> shift_func6) & mask_func6;
|
||||
auto func7 = (code >> shift_func7) & mask_func7;
|
||||
__unused(func6);
|
||||
|
||||
auto rd = (code >> shift_rd) & mask_reg;
|
||||
auto rs1 = (code >> shift_rs1) & mask_reg;
|
||||
|
@ -690,9 +688,18 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
instr->setImm(sext(imm, width_j_imm+1));
|
||||
} break;
|
||||
|
||||
|
||||
case InstType::R4: {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs3, RegType::Float);
|
||||
instr->setFunc2(func2);
|
||||
instr->setFunc3(func3);
|
||||
} break;
|
||||
|
||||
#ifdef EXT_V_ENABLE
|
||||
case InstType::V:
|
||||
instr->setVec(true);
|
||||
switch (op) {
|
||||
case Opcode::VSET: {
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
|
@ -738,7 +745,6 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
}
|
||||
}
|
||||
} break;
|
||||
|
||||
case Opcode::FL:
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->setVmop((code >> shift_vmop) & 0b11);
|
||||
|
@ -788,14 +794,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
std::abort();
|
||||
}
|
||||
break;
|
||||
case InstType::R4:
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs3, RegType::Float);
|
||||
instr->setFunc2(func2);
|
||||
instr->setFunc3(func3);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
|
|
|
@ -43,7 +43,9 @@ void Emulator::warp_t::clear(uint64_t startup_addr) {
|
|||
this->uuid = 0;
|
||||
this->fcsr = 0;
|
||||
|
||||
std::srand(50);
|
||||
this->vtype = {0, 0, 0, 0, 0};
|
||||
this->vl = 0;
|
||||
this->VLMAX = 0;
|
||||
|
||||
for (auto& reg_file : this->ireg_file) {
|
||||
for (auto& reg : reg_file) {
|
||||
|
@ -102,6 +104,8 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
|
|||
, scratchpad(std::vector<Word>(32 * 32 * 32768))
|
||||
, csrs_(arch.num_warps())
|
||||
{
|
||||
std::srand(50);
|
||||
|
||||
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
|
||||
csrs_.at(i).resize(arch.num_threads());
|
||||
}
|
||||
|
@ -142,8 +146,7 @@ void Emulator::clear() {
|
|||
warps_[0].tmask.set(0);
|
||||
wspawn_.valid = false;
|
||||
|
||||
for (auto& reg : scratchpad)
|
||||
{
|
||||
for (auto& reg : scratchpad) {
|
||||
reg = 0;
|
||||
}
|
||||
}
|
||||
|
@ -190,6 +193,7 @@ instr_trace_t* Emulator::step() {
|
|||
assert(warp.tmask.any());
|
||||
|
||||
#ifndef NDEBUG
|
||||
// generate unique universal instruction ID
|
||||
uint32_t instr_uuid = warp.uuid++;
|
||||
uint32_t g_wid = core_->id() * arch_.num_warps() + scheduled_warp;
|
||||
uint64_t uuid = (uint64_t(g_wid) << 32) | instr_uuid;
|
||||
|
@ -305,27 +309,26 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
|
|||
#ifdef VM_ENABLE
|
||||
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size);
|
||||
|
||||
try
|
||||
try
|
||||
{
|
||||
mmu_.read(data, addr, size, ACCESS_TYPE::FETCH);
|
||||
}
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
{
|
||||
std::cout<<page_fault.what()<<std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
mmu_.read(data, addr, size, 0);
|
||||
mmu_.read(data, addr, size, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Emulator::set_satp(uint64_t satp) {
|
||||
DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n");
|
||||
set_csr(VX_CSR_SATP,satp,0,0);
|
||||
set_csr(VX_CSR_SATP,satp,0,0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -337,11 +340,11 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
|
|||
if (type == AddrType::Shared) {
|
||||
core_->local_mem()->read(data, addr, size);
|
||||
} else {
|
||||
try
|
||||
try
|
||||
{
|
||||
mmu_.read(data, addr, size, ACCESS_TYPE::LOAD);
|
||||
}
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
{
|
||||
std::cout<<page_fault.what()<<std::endl;
|
||||
throw;
|
||||
|
@ -373,16 +376,16 @@ void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
|
|||
if (type == AddrType::Shared) {
|
||||
core_->local_mem()->write(data, addr, size);
|
||||
} else {
|
||||
try
|
||||
try
|
||||
{
|
||||
// mmu_.write(data, addr, size, 0);
|
||||
mmu_.write(data, addr, size, ACCESS_TYPE::STORE);
|
||||
}
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
{
|
||||
std::cout<<page_fault.what()<<std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
|
||||
|
@ -450,18 +453,15 @@ void Emulator::cout_flush() {
|
|||
case (addr + (VX_CSR_MPM_BASE_H-VX_CSR_MPM_BASE)) : return ((value >> 32) & 0xFFFFFFFF)
|
||||
#endif
|
||||
|
||||
Word Emulator::get_tiles()
|
||||
{
|
||||
Word Emulator::get_tiles() {
|
||||
return mat_size;
|
||||
}
|
||||
|
||||
Word Emulator::get_tc_size()
|
||||
{
|
||||
Word Emulator::get_tc_size() {
|
||||
return tc_size;
|
||||
}
|
||||
|
||||
Word Emulator::get_tc_num()
|
||||
{
|
||||
Word Emulator::get_tc_num() {
|
||||
return tc_num;
|
||||
}
|
||||
|
||||
|
@ -680,7 +680,7 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
|
|||
case VX_TC_SIZE:
|
||||
tc_size = value;
|
||||
break;
|
||||
|
||||
|
||||
default: {
|
||||
std::cout << "Error: invalid CSR write addr=0x" << std::hex << addr << ", value=0x" << value << std::dec << std::endl;
|
||||
std::abort();
|
||||
|
@ -688,8 +688,6 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) {
|
||||
return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3;
|
||||
}
|
||||
|
@ -711,4 +709,4 @@ void Emulator::trigger_ecall() {
|
|||
}
|
||||
void Emulator::trigger_ebreak() {
|
||||
active_warps_.reset();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,76 +28,6 @@ class Core;
|
|||
class Instr;
|
||||
class instr_trace_t;
|
||||
|
||||
enum Constants {
|
||||
width_opcode= 7,
|
||||
width_reg = 5,
|
||||
width_func2 = 2,
|
||||
width_func3 = 3,
|
||||
width_func6 = 6,
|
||||
width_func7 = 7,
|
||||
width_mop = 3,
|
||||
width_vmask = 1,
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
width_v_zimm = 11,
|
||||
width_v_ma = 1,
|
||||
width_v_ta = 1,
|
||||
width_v_sew = 3,
|
||||
width_v_lmul = 3,
|
||||
width_aq = 1,
|
||||
width_rl = 1,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
shift_func3 = shift_rd + width_reg,
|
||||
shift_rs1 = shift_func3 + width_func3,
|
||||
shift_rs2 = shift_rs1 + width_reg,
|
||||
shift_func2 = shift_rs2 + width_reg,
|
||||
shift_func7 = shift_rs2 + width_reg,
|
||||
shift_rs3 = shift_func7 + width_func2,
|
||||
shift_vmop = shift_func7 + width_vmask,
|
||||
shift_vnf = shift_vmop + width_mop,
|
||||
shift_func6 = shift_func7 + width_vmask,
|
||||
shift_vset = shift_func7 + width_func6,
|
||||
shift_v_sew = width_v_lmul,
|
||||
shift_v_ta = shift_v_sew + width_v_sew,
|
||||
shift_v_ma = shift_v_ta + width_v_ta,
|
||||
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
mask_func6 = (1 << width_func6) - 1,
|
||||
mask_func7 = (1 << width_func7) - 1,
|
||||
mask_i_imm = (1 << width_i_imm) - 1,
|
||||
mask_j_imm = (1 << width_j_imm) - 1,
|
||||
mask_v_zimm = (1 << width_v_zimm) - 1,
|
||||
mask_v_ma = (1 << width_v_ma) - 1,
|
||||
mask_v_ta = (1 << width_v_ta) - 1,
|
||||
mask_v_sew = (1 << width_v_sew) - 1,
|
||||
mask_v_lmul = (1 << width_v_lmul) - 1,
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
uint32_t vill;
|
||||
uint32_t vma;
|
||||
uint32_t vta;
|
||||
uint32_t vsew;
|
||||
uint32_t vlmul;
|
||||
};
|
||||
|
||||
union reg_data_t {
|
||||
Word u;
|
||||
WordI i;
|
||||
WordF f;
|
||||
float f32;
|
||||
double f64;
|
||||
uint32_t u32;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
int64_t i64;
|
||||
};
|
||||
|
||||
class Emulator {
|
||||
public:
|
||||
Emulator(const Arch &arch,
|
||||
|
@ -126,11 +56,11 @@ public:
|
|||
bool wspawn(uint32_t num_warps, Word nextPC);
|
||||
|
||||
int get_exitcode() const;
|
||||
|
||||
|
||||
Word get_tiles();
|
||||
Word get_tc_size();
|
||||
Word get_tc_num();
|
||||
|
||||
|
||||
void dcache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
@ -151,6 +81,26 @@ private:
|
|||
bool fallthrough;
|
||||
};
|
||||
|
||||
struct vtype_t {
|
||||
uint32_t vill;
|
||||
uint32_t vma;
|
||||
uint32_t vta;
|
||||
uint32_t vsew;
|
||||
uint32_t vlmul;
|
||||
};
|
||||
|
||||
union reg_data_t {
|
||||
Word u;
|
||||
WordI i;
|
||||
WordF f;
|
||||
float f32;
|
||||
double f64;
|
||||
uint32_t u32;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
int64_t i64;
|
||||
};
|
||||
|
||||
struct warp_t {
|
||||
warp_t(const Arch& arch);
|
||||
void clear(uint64_t startup_addr);
|
||||
|
@ -162,11 +112,10 @@ private:
|
|||
std::vector<std::vector<Byte>> vreg_file;
|
||||
std::stack<ipdom_entry_t> ipdom_stack;
|
||||
Byte fcsr;
|
||||
vtype_t vtype;
|
||||
uint32_t vl;
|
||||
Word VLMAX;
|
||||
uint32_t uuid;
|
||||
|
||||
struct vtype vtype;
|
||||
uint32_t vl;
|
||||
Word VLMAX;
|
||||
};
|
||||
|
||||
struct wspawn_t {
|
||||
|
@ -179,11 +128,11 @@ private:
|
|||
|
||||
void execute(const Instr &instr, uint32_t wid, instr_trace_t *trace);
|
||||
|
||||
void executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata);
|
||||
|
||||
#ifdef EXT_V_ENABLE
|
||||
void loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata);
|
||||
|
||||
void storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata);
|
||||
void executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata);
|
||||
#endif
|
||||
|
||||
void icache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
|
@ -203,9 +152,10 @@ private:
|
|||
|
||||
void update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid);
|
||||
|
||||
void trigger_ecall(); // Re-added for riscv-vector test functionality
|
||||
|
||||
void trigger_ebreak(); // Re-added for riscv-vector test functionality
|
||||
// temporarily added for riscv-vector tests
|
||||
// TODO: remove once ecall/ebreak are supported
|
||||
void trigger_ecall();
|
||||
void trigger_ebreak();
|
||||
|
||||
const Arch& arch_;
|
||||
const DCRS& dcrs_;
|
||||
|
|
|
@ -677,7 +677,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t read_data = 0;
|
||||
this->dcache_read(&read_data, mem_addr, data_bytes);
|
||||
trace_data->mem_addrs.at(t) = {mem_addr, data_bytes};
|
||||
|
@ -703,12 +703,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
rddata[t].u64 = read_data;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
rd_write = true;
|
||||
} else {
|
||||
loadVector(instr, wid, rsdata);
|
||||
#ifdef EXT_V_ENABLE
|
||||
this->loadVector(instr, wid, rsdata);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -736,14 +738,16 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
this->dcache_write(&write_data, mem_addr, data_bytes);
|
||||
this->dcache_write(&write_data, mem_addr, data_bytes);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
storeVector(instr, wid, rsdata);
|
||||
#ifdef EXT_V_ENABLE
|
||||
this->storeVector(instr, wid, rsdata);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1595,6 +1599,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
std::abort();
|
||||
}
|
||||
} break;
|
||||
#ifdef EXT_V_ENABLE
|
||||
case Opcode::VSET: {
|
||||
auto func6 = instr.getFunc6();
|
||||
if ((func3 == 0x7) || (func3 == 0x2 && func6 == 16) || (func3 == 0x1 && func6 == 16)) {
|
||||
|
@ -1602,6 +1607,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
}
|
||||
executeVector(instr, wid, rsdata, rddata);
|
||||
} break;
|
||||
#endif
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -1132,7 +1132,7 @@ bool isMasked(std::vector<std::vector<Byte>> &vreg_file, uint32_t maskVreg, uint
|
|||
auto& mask = vreg_file.at(maskVreg);
|
||||
uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8);
|
||||
uint8_t value = (emask >> (byteI % 8)) & 0x1;
|
||||
DP(1, "Masking enabled: " << +!vmask << " mask element: " << +value);
|
||||
DP(4, "Masking enabled: " << +!vmask << " mask element: " << +value);
|
||||
return !vmask && value == 0;
|
||||
}
|
||||
|
||||
|
@ -1164,14 +1164,14 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
|
|||
}
|
||||
for (uint32_t i = 0; i < vl * nfields; i++) {
|
||||
if (isMasked(vreg_file, 0, i / nfields, vmask)) continue;
|
||||
|
||||
|
||||
uint32_t nfields_strided = strided ? nfields : 1;
|
||||
Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT);
|
||||
Word mem_data = 0;
|
||||
emul_->dcache_read(&mem_data, mem_addr, vsew / 8);
|
||||
DP(1, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
DP(4, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
DT &result = getVregData<DT>(vreg_file, rdest + (i % nfields) * emul, i / nfields);
|
||||
DP(1, "Previous data: " << +result);
|
||||
DP(4, "Previous data: " << +result);
|
||||
result = (DT) mem_data;
|
||||
}
|
||||
}
|
||||
|
@ -1225,13 +1225,13 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
|
|||
std::cout << "Unsupported iSew: " << iSew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
|
||||
Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT);
|
||||
Word mem_data = 0;
|
||||
emul_->dcache_read(&mem_data, mem_addr, vsew / 8);
|
||||
DP(1, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
DP(4, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
DT &result = getVregData<DT>(vreg_file, rdest + (i % nfields) * emul, i / nfields);
|
||||
DP(1, "Previous data: " << +result);
|
||||
DP(4, "Previous data: " << +result);
|
||||
result = (DT) mem_data;
|
||||
}
|
||||
}
|
||||
|
@ -1256,104 +1256,6 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
|
|||
}
|
||||
}
|
||||
|
||||
void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
|
||||
auto &warp = warps_.at(wid);
|
||||
auto vmask = instr.getVmask();
|
||||
auto rdest = instr.getRDest();
|
||||
auto mop = instr.getVmop();
|
||||
switch (mop) {
|
||||
case 0b00: { // unit-stride
|
||||
auto lumop = instr.getVumop();
|
||||
switch (lumop) {
|
||||
case 0b10000: // vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride
|
||||
// vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v
|
||||
// vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v
|
||||
// vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v
|
||||
// vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v
|
||||
// vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v
|
||||
// vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v
|
||||
// vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v
|
||||
case 0b0000: { // vle8.v, vle16.v, vle32.v, vle64.v
|
||||
// vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v
|
||||
// vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v
|
||||
// vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v
|
||||
// vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v
|
||||
// vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v
|
||||
// vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v
|
||||
// vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v
|
||||
WordI stride = warp.vtype.vsew / 8;
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v
|
||||
uint32_t nreg = instr.getVnf() + 1;
|
||||
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
|
||||
std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
DP(1, "Whole vector register load with nreg: " << nreg);
|
||||
uint32_t vl = nreg * VLEN / instr.getVsew();
|
||||
WordI stride = instr.getVsew() / 8;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, instr.getVsew(), vl, false, stride, 1, 0, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1011: { // vlm.v
|
||||
if (warp.vtype.vsew != 8) {
|
||||
std::cout << "vlm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
WordI stride = warp.vtype.vsew / 8;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Load vector - unsupported lumop: " << lumop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0b10: { // strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v
|
||||
// vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v
|
||||
// vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v
|
||||
// vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v
|
||||
// vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v
|
||||
// vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v
|
||||
// vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v
|
||||
// vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v
|
||||
auto rsrc1 = instr.getRSrc(1);
|
||||
auto rdest = instr.getRDest();
|
||||
WordI stride = warp.ireg_file.at(0).at(rsrc1);
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v
|
||||
// vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v
|
||||
// vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v
|
||||
// vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v
|
||||
// vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v
|
||||
// vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v
|
||||
// vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v
|
||||
// vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v
|
||||
case 0b11: { // indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v
|
||||
// vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v
|
||||
// vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v
|
||||
// vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v
|
||||
// vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v
|
||||
// vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v
|
||||
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
|
||||
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vv_load(warp.vreg_file, this, rsdata, instr.getRSrc(1), rdest, warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Load vector - unsupported mop: " << mop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DT>
|
||||
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, std::vector<reg_data_t[3]> &rsdata, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
uint32_t vsew = sizeof(DT) * 8;
|
||||
|
@ -1364,7 +1266,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
|
|||
uint32_t nfields_strided = strided ? nfields : 1;
|
||||
Word mem_addr = rsdata[0][0].i + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT);
|
||||
Word mem_data = getVregData<DT>(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields);
|
||||
DP(1, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
DP(4, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
emul_->dcache_write(&mem_data, mem_addr, vsew / 8);
|
||||
}
|
||||
}
|
||||
|
@ -1417,7 +1319,7 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
|
|||
|
||||
Word mem_addr = rsdata[0][0].i + offset + (i % nfields) * sizeof(DT);
|
||||
Word mem_data = getVregData<DT>(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields);
|
||||
DP(1, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
DP(4, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
|
||||
emul_->dcache_write(&mem_data, mem_addr, vsew / 8);
|
||||
}
|
||||
}
|
||||
|
@ -1442,97 +1344,16 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
|
|||
}
|
||||
}
|
||||
|
||||
void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
|
||||
auto &warp = warps_.at(wid);
|
||||
auto vmask = instr.getVmask();
|
||||
auto mop = instr.getVmop();
|
||||
switch (mop) {
|
||||
case 0b00: { // unit-stride
|
||||
auto vs3 = instr.getRSrc(1);
|
||||
auto sumop = instr.getVumop();
|
||||
WordI stride = warp.vtype.vsew / 8;
|
||||
switch (sumop) {
|
||||
case 0b0000: { // vse8.v, vse16.v, vse32.v, vse64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v
|
||||
uint32_t nreg = instr.getVnf() + 1;
|
||||
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
|
||||
std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
DP(1, "Whole vector register store with nreg: " << nreg);
|
||||
uint32_t vl = nreg * VLEN / 8;
|
||||
vector_op_vix_store<uint8_t>(warp.vreg_file, this, rsdata, vs3, vl, false, stride, 1, 0, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1011: { // vsm.v
|
||||
if (warp.vtype.vsew != 8) {
|
||||
std::cout << "vsm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Store vector - unsupported sumop: " << sumop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0b10: { // strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v
|
||||
// vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v
|
||||
// vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v
|
||||
// vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v
|
||||
// vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v
|
||||
// vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v
|
||||
// vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v
|
||||
// vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v
|
||||
auto rsrc1 = instr.getRSrc(1);
|
||||
auto vs3 = instr.getRSrc(2);
|
||||
WordI stride = warp.ireg_file.at(0).at(rsrc1);
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v
|
||||
// vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v
|
||||
// vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v
|
||||
// vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v
|
||||
// vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v
|
||||
// vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v
|
||||
// vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v
|
||||
// vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v
|
||||
case 0b11: { // indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v
|
||||
// vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v
|
||||
// vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v
|
||||
// vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v
|
||||
// vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v
|
||||
// vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v
|
||||
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
|
||||
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vv_store(warp.vreg_file, this, rsdata, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Store vector - unsupported mop: " << mop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT>
|
||||
void vector_op_vix(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask)
|
||||
{
|
||||
for (uint32_t i = 0; i < vl; i++) {
|
||||
if (isMasked(vreg_file, 0, i, vmask)) continue;
|
||||
|
||||
|
||||
DT second = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
DT third = getVregData<DT>(vreg_file, rdest, i);
|
||||
DT result = OP<DT, DT>::apply(first, second, third);
|
||||
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1557,11 +1378,11 @@ void vector_op_vix(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_
|
|||
template <template <typename DT1, typename DT2> class OP, typename DT>
|
||||
void vector_op_vix_carry(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl)
|
||||
{
|
||||
for (uint32_t i = 0; i < vl; i++) {
|
||||
for (uint32_t i = 0; i < vl; i++) {
|
||||
DT second = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
bool third = !isMasked(vreg_file, 0, i, false);
|
||||
DT result = OP<DT, DT>::apply(first, second, third);
|
||||
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1586,11 +1407,11 @@ void vector_op_vix_carry(Word src1, std::vector<std::vector<Byte>> &vreg_file, u
|
|||
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
|
||||
void vector_op_vix_carry_out(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask)
|
||||
{
|
||||
for (uint32_t i = 0; i < vl; i++) {
|
||||
for (uint32_t i = 0; i < vl; i++) {
|
||||
DT second = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
bool third = !vmask && !isMasked(vreg_file, 0, i, vmask);
|
||||
bool result = OP<DT, DTR>::apply(first, second, third);
|
||||
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
if (result) {
|
||||
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
|
||||
} else {
|
||||
|
@ -1621,7 +1442,7 @@ void vector_op_vix_merge(DT first, std::vector<std::vector<Byte>> &vreg_file, ui
|
|||
{
|
||||
for (uint32_t i = 0; i < vl; i++) {
|
||||
DT result = isMasked(vreg_file, 0, i, vmask) ? getVregData<DT>(vreg_file, rsrc0, i) : first;
|
||||
DP(1, "Merge - Choosing result: " << +result);
|
||||
DP(4, "Merge - Choosing result: " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1673,7 +1494,7 @@ void vector_op_vix_w(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
|
|||
DT second = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
DTR third = getVregData<DTR>(vreg_file, rdest, i);
|
||||
DTR result = OP<DT, DTR>::apply(first, second, third);
|
||||
DP(1, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1716,7 +1537,7 @@ void vector_op_vix_n(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
|
|||
|
||||
DT second = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
|
||||
DP(1, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
DP(4, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1744,7 +1565,7 @@ void vector_op_vix_sat(DTR first, std::vector<std::vector<Byte>> &vreg_file, uin
|
|||
|
||||
DT second = getVregData<DTR>(vreg_file, rsrc0, i);
|
||||
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
|
||||
DP(1, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
|
||||
DP(4, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1854,7 +1675,7 @@ void vector_op_vix_mask(DT first, std::vector<std::vector<Byte>> &vreg_file, uin
|
|||
|
||||
DT second = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
bool result = OP<DT, bool>::apply(first, second, 0);
|
||||
DP(1, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
DP(4, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
if (result) {
|
||||
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
|
||||
} else {
|
||||
|
@ -1889,7 +1710,7 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,
|
|||
// If scalar set is set this means we have a v(f)slide1up or v(f)slide1down instruction,
|
||||
// so first is our scalar value and we need to overwrite it with 1 for later computations
|
||||
if (scalar && vl && !isMasked(vreg_file, 0, scalarPos, vmask)) {
|
||||
DP(1, "Slide - Moving scalar value " << +first << " to position " << +scalarPos);
|
||||
DP(4, "Slide - Moving scalar value " << +first << " to position " << +scalarPos);
|
||||
getVregData<DT>(vreg_file, rdest, scalarPos) = first;
|
||||
}
|
||||
first = scalar ? 1 : first;
|
||||
|
@ -1899,7 +1720,7 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,
|
|||
|
||||
__uint128_t iSrc = slideDown ? (__uint128_t)i + (__uint128_t)first : (__uint128_t)i - (__uint128_t)first; // prevent overflows/underflows
|
||||
DT value = (!slideDown || iSrc < VLMAX) ? getVregData<DT>(vreg_file, rsrc0, iSrc) : 0;
|
||||
DP(1, "Slide - Moving value " << +value << " from position " << (uint64_t)iSrc << " to position " << +i);
|
||||
DP(4, "Slide - Moving value " << +value << " from position " << (uint64_t)iSrc << " to position " << +i);
|
||||
getVregData<DT>(vreg_file, rdest, i) = value;
|
||||
}
|
||||
}
|
||||
|
@ -1928,7 +1749,7 @@ void vector_op_vix_gather(Word first, std::vector<std::vector<Byte>> &vreg_file,
|
|||
if (isMasked(vreg_file, 0, i, vmask)) continue;
|
||||
|
||||
DT value = first < VLMAX ? getVregData<DT>(vreg_file, rsrc0, first) : 0;
|
||||
DP(1, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
|
||||
DP(4, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
|
||||
getVregData<DT>(vreg_file, rdest, i) = value;
|
||||
}
|
||||
}
|
||||
|
@ -1960,7 +1781,7 @@ void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uin
|
|||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DT third = getVregData<DT>(vreg_file, rdest, i);
|
||||
DT result = OP<DT, DT>::apply(first, second, third);
|
||||
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -1990,7 +1811,7 @@ void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
|
|||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
bool third = !isMasked(vreg_file, 0, i, false);
|
||||
DT result = OP<DT, DT>::apply(first, second, third);
|
||||
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2020,7 +1841,7 @@ void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t
|
|||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
bool third = !vmask && !isMasked(vreg_file, 0, i, vmask);
|
||||
bool result = OP<DT, DTR>::apply(first, second, third);
|
||||
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
if (result) {
|
||||
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
|
||||
} else {
|
||||
|
@ -2052,7 +1873,7 @@ void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
|
|||
for (uint32_t i = 0; i < vl; i++) {
|
||||
uint32_t rsrc = isMasked(vreg_file, 0, i, vmask) ? rsrc1 : rsrc0;
|
||||
DT result = getVregData<DT>(vreg_file, rsrc, i);
|
||||
DP(1, "Merge - Choosing result: " << +result);
|
||||
DP(4, "Merge - Choosing result: " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2082,7 +1903,7 @@ void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr
|
|||
|
||||
uint32_t first = ei16 ? getVregData<uint16_t>(vreg_file, rsrc0, i) : getVregData<DT>(vreg_file, rsrc0, i);
|
||||
DT value = first < VLMAX ? getVregData<DT>(vreg_file, rsrc1, first) : 0;
|
||||
DP(1, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
|
||||
DP(4, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
|
||||
getVregData<DT>(vreg_file, rdest, i) = value;
|
||||
}
|
||||
}
|
||||
|
@ -2114,7 +1935,7 @@ void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
|
|||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DTR third = getVregData<DTR>(vreg_file, rdest, i);
|
||||
DTR result = OP<DT, DTR>::apply(first, second, third);
|
||||
DP(1, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2144,7 +1965,7 @@ void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
DTR second = getVregData<DTR>(vreg_file, rsrc1, i);
|
||||
DTR third = getVregData<DTR>(vreg_file, rdest, i);
|
||||
DTR result = OP<DTR, DTR>::apply(first, second, third);
|
||||
DP(1, "Widening wv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, "Widening wv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2174,7 +1995,7 @@ void vector_op_vv_wfv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
DTR second = getVregData<DTR>(vreg_file, rsrc1, i);
|
||||
DTR third = getVregData<DTR>(vreg_file, rdest, i);
|
||||
DTR result = OP<DTR, DTR>::apply(rv_ftod(first), second, third);
|
||||
DP(1, "Widening wfv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
DP(4, "Widening wfv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2199,7 +2020,7 @@ void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
|
|||
DTR first = getVregData<DTR>(vreg_file, rsrc0, i);
|
||||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
|
||||
DP(1, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
DP(4, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2228,7 +2049,7 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
DT first = getVregData<DTR>(vreg_file, rsrc0, i);
|
||||
DT second = getVregData<DTR>(vreg_file, rsrc1, i);
|
||||
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
|
||||
DP(1, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
|
||||
DP(4, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
|
||||
getVregData<DTR>(vreg_file, rdest, i) = result;
|
||||
}
|
||||
}
|
||||
|
@ -2280,9 +2101,9 @@ void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
DT first = getVregData<DT>(vreg_file, rdest, 0);
|
||||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DT result = OP<DT, DT>::apply(first, second, 0);
|
||||
DP(1, "Reduction " << (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
DP(4, "Reduction " << (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
getVregData<DT>(vreg_file, rdest, 0) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
|
@ -2316,9 +2137,9 @@ void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
|
|||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DTR second_w = std::is_signed<DT>() ? sext((DTR) second, sizeof(DT) * 8) : zext((DTR) second, sizeof(DT) * 8);
|
||||
DTR result = OP<DTR, DTR>::apply(first, second_w, 0);
|
||||
DP(1, "Widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
|
||||
DP(4, "Widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, 0) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
|
@ -2350,9 +2171,9 @@ void vector_op_vv_red_wf(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr
|
|||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DTR second_w = rv_ftod(second);
|
||||
DTR result = OP<DTR, DTR>::apply(first, second_w, 0);
|
||||
DP(1, "Float widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
|
||||
DP(4, "Float widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
|
||||
getVregData<DTR>(vreg_file, rdest, 0) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
|
@ -2372,9 +2193,9 @@ void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, ui
|
|||
for (uint32_t i = 0; i < vl; i++) {
|
||||
if (isMasked(vreg_file, 0, i, vmask)) continue;
|
||||
|
||||
DP(1, "Element Index = " << +i);
|
||||
DP(4, "Element Index = " << +i);
|
||||
getVregData<DT>(vreg_file, rdest, i) = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask)
|
||||
|
@ -2402,7 +2223,7 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0
|
|||
DT first = getVregData<DT>(vreg_file, rsrc0, i);
|
||||
DT second = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
bool result = OP<DT, bool>::apply(first, second, 0);
|
||||
DP(1, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
DP(4, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
if (result) {
|
||||
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
|
||||
} else {
|
||||
|
@ -2437,7 +2258,7 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0
|
|||
uint8_t secondMask = getVregData<uint8_t>(vreg_file, rsrc1, i / 8);
|
||||
bool second = (secondMask >> (i % 8)) & 0x1;
|
||||
bool result = OP<uint8_t, uint8_t>::apply(first, second, 0) & 0x1;
|
||||
DP(1, "Compare mask bits " << (OP<uint8_t, uint8_t>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
DP(4, "Compare mask bits " << (OP<uint8_t, uint8_t>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
|
||||
if (result) {
|
||||
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
|
||||
} else {
|
||||
|
@ -2456,7 +2277,7 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r
|
|||
if (isMasked(vreg_file, rsrc0, i, 0)) continue;
|
||||
|
||||
DT value = getVregData<DT>(vreg_file, rsrc1, i);
|
||||
DP(1, "Compression - Moving value " << +value << " from position " << i << " to position " << currPos);
|
||||
DP(4, "Compression - Moving value " << +value << " from position " << i << " to position " << currPos);
|
||||
getVregData<DT>(vreg_file, rdest, currPos) = value;
|
||||
currPos++;
|
||||
}
|
||||
|
@ -2479,6 +2300,185 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r
|
|||
}
|
||||
}
|
||||
|
||||
void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
|
||||
auto &warp = warps_.at(wid);
|
||||
auto vmask = instr.getVmask();
|
||||
auto rdest = instr.getRDest();
|
||||
auto mop = instr.getVmop();
|
||||
switch (mop) {
|
||||
case 0b00: { // unit-stride
|
||||
auto lumop = instr.getVumop();
|
||||
switch (lumop) {
|
||||
case 0b10000: // vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride
|
||||
// vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v
|
||||
// vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v
|
||||
// vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v
|
||||
// vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v
|
||||
// vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v
|
||||
// vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v
|
||||
// vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v
|
||||
case 0b0000: { // vle8.v, vle16.v, vle32.v, vle64.v
|
||||
// vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v
|
||||
// vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v
|
||||
// vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v
|
||||
// vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v
|
||||
// vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v
|
||||
// vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v
|
||||
// vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v
|
||||
WordI stride = warp.vtype.vsew / 8;
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v
|
||||
uint32_t nreg = instr.getVnf() + 1;
|
||||
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
|
||||
std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
DP(4, "Whole vector register load with nreg: " << nreg);
|
||||
uint32_t vl = nreg * VLEN / instr.getVsew();
|
||||
WordI stride = instr.getVsew() / 8;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, instr.getVsew(), vl, false, stride, 1, 0, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1011: { // vlm.v
|
||||
if (warp.vtype.vsew != 8) {
|
||||
std::cout << "vlm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
WordI stride = warp.vtype.vsew / 8;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Load vector - unsupported lumop: " << lumop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0b10: { // strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v
|
||||
// vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v
|
||||
// vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v
|
||||
// vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v
|
||||
// vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v
|
||||
// vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v
|
||||
// vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v
|
||||
// vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v
|
||||
auto rsrc1 = instr.getRSrc(1);
|
||||
auto rdest = instr.getRDest();
|
||||
WordI stride = warp.ireg_file.at(0).at(rsrc1);
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v
|
||||
// vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v
|
||||
// vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v
|
||||
// vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v
|
||||
// vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v
|
||||
// vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v
|
||||
// vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v
|
||||
// vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v
|
||||
case 0b11: { // indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v
|
||||
// vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v
|
||||
// vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v
|
||||
// vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v
|
||||
// vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v
|
||||
// vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v
|
||||
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
|
||||
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vv_load(warp.vreg_file, this, rsdata, instr.getRSrc(1), rdest, warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Load vector - unsupported mop: " << mop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
|
||||
auto &warp = warps_.at(wid);
|
||||
auto vmask = instr.getVmask();
|
||||
auto mop = instr.getVmop();
|
||||
switch (mop) {
|
||||
case 0b00: { // unit-stride
|
||||
auto vs3 = instr.getRSrc(1);
|
||||
auto sumop = instr.getVumop();
|
||||
WordI stride = warp.vtype.vsew / 8;
|
||||
switch (sumop) {
|
||||
case 0b0000: { // vse8.v, vse16.v, vse32.v, vse64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v
|
||||
uint32_t nreg = instr.getVnf() + 1;
|
||||
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
|
||||
std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
DP(4, "Whole vector register store with nreg: " << nreg);
|
||||
uint32_t vl = nreg * VLEN / 8;
|
||||
vector_op_vix_store<uint8_t>(warp.vreg_file, this, rsdata, vs3, vl, false, stride, 1, 0, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b1011: { // vsm.v
|
||||
if (warp.vtype.vsew != 8) {
|
||||
std::cout << "vsm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Store vector - unsupported sumop: " << sumop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0b10: { // strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v
|
||||
// vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v
|
||||
// vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v
|
||||
// vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v
|
||||
// vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v
|
||||
// vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v
|
||||
// vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v
|
||||
// vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v
|
||||
auto rsrc1 = instr.getRSrc(1);
|
||||
auto vs3 = instr.getRSrc(2);
|
||||
WordI stride = warp.ireg_file.at(0).at(rsrc1);
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v
|
||||
// vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v
|
||||
// vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v
|
||||
// vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v
|
||||
// vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v
|
||||
// vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v
|
||||
// vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v
|
||||
// vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v
|
||||
case 0b11: { // indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v
|
||||
// vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v
|
||||
// vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v
|
||||
// vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v
|
||||
// vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v
|
||||
// vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v
|
||||
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
|
||||
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
vector_op_vv_store(warp.vreg_file, this, rsdata, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Store vector - unsupported mop: " << mop << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata) {
|
||||
auto &warp = warps_.at(wid);
|
||||
auto func3 = instr.getFunc3();
|
||||
|
@ -2491,10 +2491,10 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
auto uimmsrc = (Word)instr.getImm();
|
||||
auto vmask = instr.getVmask();
|
||||
auto num_threads = arch_.num_threads();
|
||||
|
||||
|
||||
switch (func3) {
|
||||
case 0: { // vector - vector
|
||||
switch (func6) {
|
||||
switch (func6) {
|
||||
case 0: { // vadd.vv
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t)) continue;
|
||||
|
@ -2769,7 +2769,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
default:
|
||||
std::cout << "Unrecognised vector - vector instruction func3: " << func3 << " func6: " << func6 << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case 1: { // float vector - vector
|
||||
switch (func6) {
|
||||
|
@ -2839,7 +2839,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
if (!warp.tmask.test(t)) continue;
|
||||
auto &dest = rddata[t].u64;
|
||||
vector_op_scalar(dest, warp.vreg_file, rsrc0, rsrc1, warp.vtype.vsew);
|
||||
DP(1, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
|
||||
DP(4, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
|
||||
}
|
||||
} break;
|
||||
case 18: {
|
||||
|
@ -3107,7 +3107,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
if (!warp.tmask.test(t)) continue;
|
||||
auto &dest = rddata[t].i;
|
||||
vector_op_scalar(dest, warp.vreg_file, rsrc0, rsrc1, warp.vtype.vsew);
|
||||
DP(1, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
|
||||
DP(4, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
|
||||
}
|
||||
} break;
|
||||
case 18: { // vzext.vf8, vsext.vf8, vzext.vf4, vsext.vf4, vzext.vf2, vsext.vf2
|
||||
|
@ -4438,7 +4438,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
uint32_t vsew = instr.getVsew();
|
||||
uint32_t vlmul = instr.getVlmul();
|
||||
|
||||
if(!instr.hasZimm()){ // vsetvl
|
||||
if (!instr.hasZimm()) { // vsetvl
|
||||
uint32_t zimm = rsdata[0][1].u;
|
||||
vlmul = zimm & mask_v_lmul;
|
||||
vsewO = (zimm >> shift_v_sew) & mask_v_sew;
|
||||
|
@ -4459,7 +4459,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
s0 = rsdata[0][0].u;
|
||||
}
|
||||
|
||||
DP(1, "Vset(i)vl(i) - vill: " << +warp.vtype.vill << " vma: " << vma << " vta: " << vta << " lmul: " << vlmul << " sew: " << vsew << " s0: " << s0 << " VLMAX: " << warp.VLMAX);
|
||||
DP(4, "Vset(i)vl(i) - vill: " << +warp.vtype.vill << " vma: " << vma << " vta: " << vta << " lmul: " << vlmul << " sew: " << vsew << " s0: " << s0 << " VLMAX: " << warp.VLMAX);
|
||||
warp.vl = std::min(s0, warp.VLMAX);
|
||||
|
||||
if (warp.vtype.vill) {
|
||||
|
@ -4490,4 +4490,4 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
|
|||
std::cout << "Unrecognised vector instruction func3: " << func3 << " func6: " << func6 << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
}
|
195
sim/simx/instr.h
195
sim/simx/instr.h
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,8 +17,8 @@
|
|||
|
||||
namespace vortex {
|
||||
|
||||
enum class Opcode {
|
||||
NONE = 0,
|
||||
enum class Opcode {
|
||||
NONE = 0,
|
||||
R = 0x33,
|
||||
L = 0x3,
|
||||
I = 0x13,
|
||||
|
@ -38,11 +38,11 @@ enum class Opcode {
|
|||
FMADD = 0x43,
|
||||
FMSUB = 0x47,
|
||||
FMNMSUB = 0x4b,
|
||||
FMNMADD = 0x4f,
|
||||
FMNMADD = 0x4f,
|
||||
// RV64 Standard Extension
|
||||
R_W = 0x3b,
|
||||
I_W = 0x1b,
|
||||
// Vector Extension
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
// Custom Extensions
|
||||
EXT1 = 0x0b,
|
||||
|
@ -52,37 +52,84 @@ enum class Opcode {
|
|||
};
|
||||
|
||||
enum class InstType {
|
||||
R,
|
||||
I,
|
||||
S,
|
||||
B,
|
||||
U,
|
||||
R,
|
||||
I,
|
||||
S,
|
||||
B,
|
||||
U,
|
||||
J,
|
||||
V,
|
||||
R4
|
||||
};
|
||||
|
||||
enum set_vuse_mask {
|
||||
set_func3 = (1 << 0),
|
||||
set_func6 = (1 << 1),
|
||||
set_imm = (1 << 2),
|
||||
set_vlswidth = (1 << 3),
|
||||
set_vmop = (1 << 4),
|
||||
set_vumop = (1 << 5),
|
||||
set_vnf = (1 << 6),
|
||||
set_vmask = (1 << 7),
|
||||
set_vs3 = (1 << 8),
|
||||
set_zimm = (1 << 9),
|
||||
set_vlmul = (1 << 10),
|
||||
set_vsew = (1 << 11),
|
||||
set_vta = (1 << 12),
|
||||
set_vma = (1 << 13),
|
||||
set_vediv = (1 << 14)
|
||||
enum DecodeConstants {
|
||||
width_opcode= 7,
|
||||
width_reg = 5,
|
||||
width_func2 = 2,
|
||||
width_func3 = 3,
|
||||
width_func6 = 6,
|
||||
width_func7 = 7,
|
||||
width_mop = 3,
|
||||
width_vmask = 1,
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
width_v_zimm = 11,
|
||||
width_v_ma = 1,
|
||||
width_v_ta = 1,
|
||||
width_v_sew = 3,
|
||||
width_v_lmul = 3,
|
||||
width_aq = 1,
|
||||
width_rl = 1,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
shift_func3 = shift_rd + width_reg,
|
||||
shift_rs1 = shift_func3 + width_func3,
|
||||
shift_rs2 = shift_rs1 + width_reg,
|
||||
shift_func2 = shift_rs2 + width_reg,
|
||||
shift_func7 = shift_rs2 + width_reg,
|
||||
shift_rs3 = shift_func7 + width_func2,
|
||||
shift_vmop = shift_func7 + width_vmask,
|
||||
shift_vnf = shift_vmop + width_mop,
|
||||
shift_func6 = shift_func7 + width_vmask,
|
||||
shift_vset = shift_func7 + width_func6,
|
||||
shift_v_sew = width_v_lmul,
|
||||
shift_v_ta = shift_v_sew + width_v_sew,
|
||||
shift_v_ma = shift_v_ta + width_v_ta,
|
||||
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
mask_func6 = (1 << width_func6) - 1,
|
||||
mask_func7 = (1 << width_func7) - 1,
|
||||
mask_i_imm = (1 << width_i_imm) - 1,
|
||||
mask_j_imm = (1 << width_j_imm) - 1,
|
||||
mask_v_zimm = (1 << width_v_zimm) - 1,
|
||||
mask_v_ma = (1 << width_v_ma) - 1,
|
||||
mask_v_ta = (1 << width_v_ta) - 1,
|
||||
mask_v_sew = (1 << width_v_sew) - 1,
|
||||
mask_v_lmul = (1 << width_v_lmul) - 1,
|
||||
};
|
||||
|
||||
enum VectorAttrMask {
|
||||
vattr_vlswidth = (1 << 3),
|
||||
vattr_vmop = (1 << 4),
|
||||
vattr_vumop = (1 << 5),
|
||||
vattr_vnf = (1 << 6),
|
||||
vattr_vmask = (1 << 7),
|
||||
vattr_vs3 = (1 << 8),
|
||||
vattr_zimm = (1 << 9),
|
||||
vattr_vlmul = (1 << 10),
|
||||
vattr_vsew = (1 << 11),
|
||||
vattr_vta = (1 << 12),
|
||||
vattr_vma = (1 << 13),
|
||||
vattr_vediv = (1 << 14)
|
||||
};
|
||||
|
||||
class Instr {
|
||||
public:
|
||||
Instr()
|
||||
Instr()
|
||||
: opcode_(Opcode::NONE)
|
||||
, num_rsrcs_(0)
|
||||
, has_imm_(false)
|
||||
|
@ -105,60 +152,72 @@ public:
|
|||
, vta_(0)
|
||||
, vma_(0)
|
||||
, vediv_(0)
|
||||
, _vusemask(0)
|
||||
, _is_vec(false) {
|
||||
, vattr_mask_(0) {
|
||||
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
rsrc_type_[i] = RegType::None;
|
||||
rsrc_[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setDestReg(uint32_t destReg, RegType type) {
|
||||
rdest_type_ = type;
|
||||
rdest_ = destReg;
|
||||
void setOpcode(Opcode opcode) {
|
||||
opcode_ = opcode;
|
||||
}
|
||||
void addSrcReg(uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[num_rsrcs_] = type;
|
||||
rsrc_[num_rsrcs_] = srcReg;
|
||||
|
||||
void setDestReg(uint32_t destReg, RegType type) {
|
||||
rdest_type_ = type;
|
||||
rdest_ = destReg;
|
||||
}
|
||||
|
||||
void addSrcReg(uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[num_rsrcs_] = type;
|
||||
rsrc_[num_rsrcs_] = srcReg;
|
||||
++num_rsrcs_;
|
||||
}
|
||||
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[index] = type;
|
||||
rsrc_[index] = srcReg;
|
||||
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
|
||||
|
||||
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[index] = type;
|
||||
rsrc_[index] = srcReg;
|
||||
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
|
||||
}
|
||||
|
||||
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; }
|
||||
|
||||
void setFunc2(uint32_t func2) { func2_ = func2; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; _vusemask |= set_func3; }
|
||||
void setFunc6(uint32_t func6) { func6_ = func6; _vusemask |= set_func6; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; }
|
||||
void setFunc6(uint32_t func6) { func6_ = func6; }
|
||||
void setFunc7(uint32_t func7) { func7_ = func7; }
|
||||
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; _vusemask |= set_imm; }
|
||||
void setVlsWidth(uint32_t width) { vlsWidth_ = width; _vusemask |= set_vlswidth; }
|
||||
void setVmop(uint32_t mop) { vMop_ = mop; _vusemask |= set_vmop; }
|
||||
void setVumop(uint32_t umop) { vUmop_ = umop; _vusemask |= set_vumop; }
|
||||
void setVnf(uint32_t nf) { vNf_ = nf; _vusemask |= set_vnf; }
|
||||
void setVmask(uint32_t mask) { vmask_ = mask; _vusemask |= set_vmask; }
|
||||
void setVs3(uint32_t vs) { vs3_ = vs; _vusemask |= set_vs3; }
|
||||
void setZimm(bool has_zimm) { has_zimm_ = has_zimm; _vusemask |= set_zimm; }
|
||||
void setVlmul(uint32_t lmul) { vlmul_ = lmul; _vusemask |= set_vlmul; }
|
||||
void setVsew(uint32_t sew) { vsew_ = sew; _vusemask |= set_vsew; }
|
||||
void setVta(uint32_t vta) { vta_ = vta; _vusemask |= set_vta; }
|
||||
void setVma(uint32_t vma) { vma_ = vma; _vusemask |= set_vma; }
|
||||
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; _vusemask |= set_vediv; }
|
||||
void setVec(bool is_vec) { _is_vec = is_vec; }
|
||||
|
||||
// Attributes for Vector instructions
|
||||
void setVlsWidth(uint32_t width) { vlsWidth_ = width; vattr_mask_ |= vattr_vlswidth; }
|
||||
void setVmop(uint32_t mop) { vMop_ = mop; vattr_mask_ |= vattr_vmop; }
|
||||
void setVumop(uint32_t umop) { vUmop_ = umop; vattr_mask_ |= vattr_vumop; }
|
||||
void setVnf(uint32_t nf) { vNf_ = nf; vattr_mask_ |= vattr_vnf; }
|
||||
void setVmask(uint32_t mask) { vmask_ = mask; vattr_mask_ |= vattr_vmask; }
|
||||
void setVs3(uint32_t vs) { vs3_ = vs; vattr_mask_ |= vattr_vs3; }
|
||||
void setZimm(bool has_zimm) { has_zimm_ = has_zimm; vattr_mask_ |= vattr_zimm; }
|
||||
void setVlmul(uint32_t lmul) { vlmul_ = lmul; vattr_mask_ |= vattr_vlmul; }
|
||||
void setVsew(uint32_t sew) { vsew_ = sew; vattr_mask_ |= vattr_vsew; }
|
||||
void setVta(uint32_t vta) { vta_ = vta; vattr_mask_ |= vattr_vta; }
|
||||
void setVma(uint32_t vma) { vma_ = vma; vattr_mask_ |= vattr_vma; }
|
||||
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; vattr_mask_ |= vattr_vediv; }
|
||||
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
|
||||
uint32_t getNRSrc() const { return num_rsrcs_; }
|
||||
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
|
||||
uint32_t getRDest() const { return rdest_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
|
||||
bool hasImm() const { return has_imm_; }
|
||||
uint32_t getImm() const { return imm_; }
|
||||
|
||||
uint32_t getFunc2() const { return func2_; }
|
||||
uint32_t getFunc3() const { return func3_; }
|
||||
uint32_t getFunc6() const { return func6_; }
|
||||
uint32_t getFunc7() const { return func7_; }
|
||||
uint32_t getNRSrc() const { return num_rsrcs_; }
|
||||
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
uint32_t getRDest() const { return rdest_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
uint32_t getImm() const { return imm_; }
|
||||
|
||||
uint32_t getVlsWidth() const { return vlsWidth_; }
|
||||
uint32_t getVmop() const { return vMop_; }
|
||||
uint32_t getVumop() const { return vUmop_; }
|
||||
|
@ -172,8 +231,7 @@ public:
|
|||
uint32_t getVta() const { return vta_; }
|
||||
uint32_t getVma() const { return vma_; }
|
||||
uint32_t getVediv() const { return vediv_; }
|
||||
uint32_t getVUseMask() const { return _vusemask; }
|
||||
bool isVec() const { return _is_vec; }
|
||||
uint32_t getVattrMask() const { return vattr_mask_; }
|
||||
|
||||
private:
|
||||
|
||||
|
@ -187,7 +245,7 @@ private:
|
|||
RegType rdest_type_;
|
||||
uint32_t imm_;
|
||||
RegType rsrc_type_[MAX_REG_SOURCES];
|
||||
uint32_t rsrc_[MAX_REG_SOURCES];
|
||||
uint32_t rsrc_[MAX_REG_SOURCES];
|
||||
uint32_t rdest_;
|
||||
uint32_t func2_;
|
||||
uint32_t func3_;
|
||||
|
@ -207,8 +265,7 @@ private:
|
|||
uint32_t vta_;
|
||||
uint32_t vma_;
|
||||
uint32_t vediv_;
|
||||
uint32_t _vusemask;
|
||||
bool _is_vec;
|
||||
uint32_t vattr_mask_;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Instr&);
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue