vector ISA updates

This commit is contained in:
tinebp 2024-12-05 14:43:51 -08:00
parent 5d91fe58ad
commit 6b23d290c3
13 changed files with 858 additions and 859 deletions

View file

@ -394,7 +394,7 @@ vector()
{
echo "begin vector tests..."
make -C sim/simx
make -C sim/simx clean && CONFIGS="-DEXT_V_ENABLE" make -C sim/simx
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh
echo "vector tests done!"

View file

@ -830,6 +830,12 @@
`define EXT_M_ENABLED 0
`endif
`ifdef EXT_V_ENABLE
`define EXT_V_ENABLED 1
`else
`define EXT_V_ENABLED 0
`endif
`ifdef EXT_ZICOND_ENABLE
`define EXT_ZICOND_ENABLED 1
`else
@ -846,7 +852,7 @@
`define ISA_STD_N 13
`define ISA_STD_Q 16
`define ISA_STD_S 18
`define ISA_STD_U 20
`define ISA_STD_V 21
`define ISA_EXT_ICACHE 0
`define ISA_EXT_DCACHE 1
@ -883,7 +889,7 @@
| (0 << 18) /* S - Supervisor mode implemented */ \
| (0 << 19) /* T - Tentatively reserved for Transactional Memory extension */ \
| (1 << 20) /* U - User mode implemented */ \
| (0 << 21) /* V - Tentatively reserved for Vector extension */ \
| (`EXT_V_ENABLED << 21) /* V - Tentatively reserved for Vector extension */ \
| (0 << 22) /* W - Reserved */ \
| (1 << 23) /* X - Non-standard extensions present */ \
| (0 << 24) /* Y - Reserved */ \

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -12,11 +12,11 @@
// limitations under the License.
#include "rvfloats.h"
#include "softfloat_ext.h"
#include <stdio.h>
extern "C" {
#include <softfloat.h>
#include "softfloat_ext.h"
#include <internals.h>
#include <../RISCV/specialize.h>
}
@ -344,7 +344,7 @@ bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) {
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) {
rv_init(0);
auto r = f32_eq(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = softfloat_exceptionFlags; }
if (fflags) { *fflags = softfloat_exceptionFlags; }
return r;
}
@ -355,11 +355,11 @@ bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags) {
return r;
}
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
uint32_t r;
rv_init(0);
if (isNaNF32UI(a) && isNaNF32UI(b)) {
r = defaultNaNF32UI;
r = defaultNaNF32UI;
} else {
auto fa = to_float32_t(a);
auto fb = to_float32_t(b);
@ -374,11 +374,11 @@ uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
return r;
}
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
uint64_t r;
rv_init(0);
if (isNaNF64UI(a) && isNaNF64UI(b)) {
r = defaultNaNF64UI;
r = defaultNaNF64UI;
} else {
auto fa = to_float64_t(a);
auto fb = to_float64_t(b);
@ -397,7 +397,7 @@ uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
uint32_t r;
rv_init(0);
if (isNaNF32UI(a) && isNaNF32UI(b)) {
r = defaultNaNF32UI;
r = defaultNaNF32UI;
} else {
auto fa = to_float32_t(a);
auto fb = to_float32_t(b);
@ -416,7 +416,7 @@ uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
uint64_t r;
rv_init(0);
if (isNaNF64UI(a) && isNaNF64UI(b)) {
r = defaultNaNF64UI;
r = defaultNaNF64UI;
} else {
auto fa = to_float64_t(a);
auto fb = to_float64_t(b);
@ -449,8 +449,8 @@ uint32_t rv_fclss_s(uint32_t a) {
( !sign && subnormOrZero && !fracZero ) << 5 |
( !sign && subnormOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
( isNaN && !isSNaN ) << 9;
return r;
}
@ -472,8 +472,8 @@ uint32_t rv_fclss_d(uint64_t a) {
( !sign && subnormOrZero && !fracZero ) << 5 |
( !sign && subnormOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
( isNaN && !isSNaN ) << 9;
return r;
}
@ -483,7 +483,7 @@ uint32_t rv_fsgnj_s(uint32_t a, uint32_t b) {
return r;
}
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) {
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) {
auto sign = b & F64_SIGN;
auto r = sign | (a & ~F64_SIGN);
return r;
@ -495,7 +495,7 @@ uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b) {
return r;
}
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) {
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) {
auto sign = ~b & F64_SIGN;
auto r = sign | (a & ~F64_SIGN);
return r;
@ -508,7 +508,7 @@ uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b) {
return r;
}
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
auto sign1 = a & F64_SIGN;
auto sign2 = b & F64_SIGN;
auto r = (sign1 ^ sign2) | (a & ~F64_SIGN);

View file

@ -33,110 +33,103 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <assert.h>
#include <stdbool.h>
#include <internals.h>
#include <../RISCV/specialize.h>
#include <softfloat.h>
#include "softfloat_ext.h"
#include <../RISCV/specialize.h>
#include <assert.h>
#include <internals.h>
#include <softfloat.h>
#include <stdbool.h>
uint_fast16_t f16_classify( float16_t a )
{
union ui16_f16 uA;
uint_fast16_t uiA;
#ifdef __cplusplus
extern "C" {
#endif
uA.f = a;
uiA = uA.ui;
uint_fast16_t f16_classify(float16_t a) {
union ui16_f16 uA;
uint_fast16_t uiA;
uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F;
uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0;
bool sign = signF16UI( uiA );
bool fracZero = fracF16UI( uiA ) == 0;
bool isNaN = isNaNF16UI( uiA );
bool isSNaN = softfloat_isSigNaNF16UI( uiA );
uA.f = a;
uiA = uA.ui;
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
uint_fast16_t infOrNaN = expF16UI(uiA) == 0x1F;
uint_fast16_t subnormalOrZero = expF16UI(uiA) == 0;
bool sign = signF16UI(uiA);
bool fracZero = fracF16UI(uiA) == 0;
bool isNaN = isNaNF16UI(uiA);
bool isSNaN = softfloat_isSigNaNF16UI(uiA);
return (sign && infOrNaN && fracZero) << 0 |
(sign && !infOrNaN && !subnormalOrZero) << 1 |
(sign && subnormalOrZero && !fracZero) << 2 |
(sign && subnormalOrZero && fracZero) << 3 |
(!sign && infOrNaN && fracZero) << 7 |
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
(!sign && subnormalOrZero && !fracZero) << 5 |
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
(isNaN && !isSNaN) << 9;
}
uint_fast16_t f32_classify( float32_t a )
{
union ui32_f32 uA;
uint_fast32_t uiA;
uint_fast16_t f32_classify(float32_t a) {
union ui32_f32 uA;
uint_fast32_t uiA;
uA.f = a;
uiA = uA.ui;
uA.f = a;
uiA = uA.ui;
uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF;
uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0;
bool sign = signF32UI( uiA );
bool fracZero = fracF32UI( uiA ) == 0;
bool isNaN = isNaNF32UI( uiA );
bool isSNaN = softfloat_isSigNaNF32UI( uiA );
uint_fast16_t infOrNaN = expF32UI(uiA) == 0xFF;
uint_fast16_t subnormalOrZero = expF32UI(uiA) == 0;
bool sign = signF32UI(uiA);
bool fracZero = fracF32UI(uiA) == 0;
bool isNaN = isNaNF32UI(uiA);
bool isSNaN = softfloat_isSigNaNF32UI(uiA);
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
return (sign && infOrNaN && fracZero) << 0 |
(sign && !infOrNaN && !subnormalOrZero) << 1 |
(sign && subnormalOrZero && !fracZero) << 2 |
(sign && subnormalOrZero && fracZero) << 3 |
(!sign && infOrNaN && fracZero) << 7 |
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
(!sign && subnormalOrZero && !fracZero) << 5 |
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
(isNaN && !isSNaN) << 9;
}
uint_fast16_t f64_classify( float64_t a )
{
union ui64_f64 uA;
uint_fast64_t uiA;
uint_fast16_t f64_classify(float64_t a) {
union ui64_f64 uA;
uint_fast64_t uiA;
uA.f = a;
uiA = uA.ui;
uA.f = a;
uiA = uA.ui;
uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF;
uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0;
bool sign = signF64UI( uiA );
bool fracZero = fracF64UI( uiA ) == 0;
bool isNaN = isNaNF64UI( uiA );
bool isSNaN = softfloat_isSigNaNF64UI( uiA );
uint_fast16_t infOrNaN = expF64UI(uiA) == 0x7FF;
uint_fast16_t subnormalOrZero = expF64UI(uiA) == 0;
bool sign = signF64UI(uiA);
bool fracZero = fracF64UI(uiA) == 0;
bool isNaN = isNaNF64UI(uiA);
bool isSNaN = softfloat_isSigNaNF64UI(uiA);
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
return (sign && infOrNaN && fracZero) << 0 |
(sign && !infOrNaN && !subnormalOrZero) << 1 |
(sign && subnormalOrZero && !fracZero) << 2 |
(sign && subnormalOrZero && fracZero) << 3 |
(!sign && infOrNaN && fracZero) << 7 |
(!sign && !infOrNaN && !subnormalOrZero) << 6 |
(!sign && subnormalOrZero && !fracZero) << 5 |
(!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 |
(isNaN && !isSNaN) << 9;
}
static inline uint64_t extract64(uint64_t val, int pos, int len)
{
static inline uint64_t extract64(uint64_t val, int pos, int len) {
assert(pos >= 0 && len > 0 && len <= 64 - pos);
return (val >> pos) & (~UINT64_C(0) >> (64 - len));
}
static inline uint64_t make_mask64(int pos, int len)
{
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
return (UINT64_MAX >> (64 - len)) << pos;
static inline uint64_t make_mask64(int pos, int len) {
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
return (UINT64_MAX >> (64 - len)) << pos;
}
//user needs to truncate output to required length
// user needs to truncate output to required length
static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
uint64_t exp = extract64(val, s, e);
uint64_t sig = extract64(val, 0, s);
@ -144,343 +137,320 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
const int p = 7;
static const uint8_t table[] = {
52, 51, 50, 48, 47, 46, 44, 43,
42, 41, 40, 39, 38, 36, 35, 34,
33, 32, 31, 30, 30, 29, 28, 27,
26, 25, 24, 23, 23, 22, 21, 20,
19, 19, 18, 17, 16, 16, 15, 14,
14, 13, 12, 12, 11, 10, 10, 9,
9, 8, 7, 7, 6, 6, 5, 4,
4, 3, 3, 2, 2, 1, 1, 0,
127, 125, 123, 121, 119, 118, 116, 114,
113, 111, 109, 108, 106, 105, 103, 102,
100, 99, 97, 96, 95, 93, 92, 91,
90, 88, 87, 86, 85, 84, 83, 82,
80, 79, 78, 77, 76, 75, 74, 73,
72, 71, 70, 70, 69, 68, 67, 66,
65, 64, 63, 63, 62, 61, 60, 59,
59, 58, 57, 56, 56, 55, 54, 53};
52, 51, 50, 48, 47, 46, 44, 43, 42, 41, 40, 39, 38, 36, 35,
34, 33, 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22,
21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11,
10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 4, 4, 3, 3, 2,
2, 1, 1, 0, 127, 125, 123, 121, 119, 118, 116, 114, 113, 111, 109,
108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88,
87, 86, 85, 84, 83, 82, 80, 79, 78, 77, 76, 75, 74, 73, 72,
71, 70, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 59,
59, 58, 57, 56, 56, 55, 54, 53};
if (sub) {
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
sig = (sig << 1) & make_mask64(0 ,s);
sig = (sig << 1) & make_mask64(0, s);
}
int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
int idx = ((exp & 1) << (p - 1)) | (sig >> (s - p + 1));
uint64_t out_sig = (uint64_t)(table[idx]) << (s - p);
uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
return (sign << (s+e)) | (out_exp << s) | out_sig;
return (sign << (s + e)) | (out_exp << s) | out_sig;
}
float16_t f16_rsqrte7(float16_t in)
{
union ui16_f16 uA;
float16_t f16_rsqrte7(float16_t in) {
union ui16_f16 uA;
uA.f = in;
unsigned int ret = f16_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
break;
}
uA.f = in;
unsigned bool sub = false;
switch (ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
break;
}
return uA.f;
return uA.f;
}
float32_t f32_rsqrte7(float32_t in)
{
union ui32_f32 uA;
float32_t f32_rsqrte7(float32_t in) {
union ui32_f32 uA;
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
break;
}
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
switch (ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
break;
}
return uA.f;
return uA.f;
}
float64_t f64_rsqrte7(float64_t in)
{
union ui64_f64 uA;
float64_t f64_rsqrte7(float64_t in) {
union ui64_f64 uA;
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
break;
}
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
switch (ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000ul;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +num
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
break;
}
return uA.f;
return uA.f;
}
//user needs to truncate output to required length
// user needs to truncate output to required length
static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
bool *round_abnormal)
{
uint64_t exp = extract64(val, s, e);
uint64_t sig = extract64(val, 0, s);
uint64_t sign = extract64(val, s + e, 1);
const int p = 7;
bool *round_abnormal) {
uint64_t exp = extract64(val, s, e);
uint64_t sig = extract64(val, 0, s);
uint64_t sign = extract64(val, s + e, 1);
const int p = 7;
static const uint8_t table[] = {
127, 125, 123, 121, 119, 117, 116, 114,
112, 110, 109, 107, 105, 104, 102, 100,
99, 97, 96, 94, 93, 91, 90, 88,
87, 85, 84, 83, 81, 80, 79, 77,
76, 75, 74, 72, 71, 70, 69, 68,
66, 65, 64, 63, 62, 61, 60, 59,
58, 57, 56, 55, 54, 53, 52, 51,
50, 49, 48, 47, 46, 45, 44, 43,
42, 41, 40, 40, 39, 38, 37, 36,
35, 35, 34, 33, 32, 31, 31, 30,
29, 28, 28, 27, 26, 25, 25, 24,
23, 23, 22, 21, 21, 20, 19, 19,
18, 17, 17, 16, 15, 15, 14, 14,
13, 12, 12, 11, 11, 10, 9, 9,
8, 8, 7, 7, 6, 5, 5, 4,
4, 3, 3, 2, 2, 1, 1, 0};
static const uint8_t table[] = {
127, 125, 123, 121, 119, 117, 116, 114, 112, 110, 109, 107, 105, 104, 102,
100, 99, 97, 96, 94, 93, 91, 90, 88, 87, 85, 84, 83, 81, 80,
79, 77, 76, 75, 74, 72, 71, 70, 69, 68, 66, 65, 64, 63, 62,
61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47,
46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34,
33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23,
22, 21, 21, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13,
12, 12, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 5, 5, 4,
4, 3, 3, 2, 2, 1, 1, 0};
if (sub) {
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
if (sub) {
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
sig = (sig << 1) & make_mask64(0 ,s);
sig = (sig << 1) & make_mask64(0, s);
if (exp != 0 && exp != UINT64_MAX) {
*round_abnormal = true;
if (rm == 1 ||
(rm == 2 && !sign) ||
(rm == 3 && sign))
return ((sign << (s+e)) | make_mask64(s, e)) - 1;
else
return (sign << (s+e)) | make_mask64(s, e);
}
if (exp != 0 && exp != UINT64_MAX) {
*round_abnormal = true;
if (rm == 1 || (rm == 2 && !sign) || (rm == 3 && sign))
return ((sign << (s + e)) | make_mask64(s, e)) - 1;
else
return (sign << (s + e)) | make_mask64(s, e);
}
}
int idx = sig >> (s-p);
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
if (out_exp == 0 || out_exp == UINT64_MAX) {
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
if (out_exp == UINT64_MAX) {
out_sig >>= 1;
out_exp = 0;
}
int idx = sig >> (s - p);
uint64_t out_sig = (uint64_t)(table[idx]) << (s - p);
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
if (out_exp == 0 || out_exp == UINT64_MAX) {
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
if (out_exp == UINT64_MAX) {
out_sig >>= 1;
out_exp = 0;
}
}
return (sign << (s+e)) | (out_exp << s) | out_sig;
return (sign << (s + e)) | (out_exp << s) | out_sig;
}
float16_t f16_recip7(float16_t in)
{
union ui16_f16 uA;
float16_t f16_recip7(float16_t in) {
union ui16_f16 uA;
uA.f = in;
unsigned int ret = f16_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x8000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 5, 10,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
uA.f = in;
unsigned int ret = f16_classify(in);
bool sub = false;
bool round_abnormal = false;
switch (ret) {
case 0x001: // -inf
uA.ui = 0x8000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfc00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7c00;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF16UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 5, 10, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
break;
}
return uA.f;
return uA.f;
}
float32_t f32_recip7(float32_t in)
{
union ui32_f32 uA;
float32_t f32_recip7(float32_t in) {
union ui32_f32 uA;
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x80000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 8, 23,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
uA.f = in;
unsigned int ret = f32_classify(in);
bool sub = false;
bool round_abnormal = false;
switch (ret) {
case 0x001: // -inf
uA.ui = 0x80000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xff800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f800000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF32UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 8, 23, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
break;
}
return uA.f;
return uA.f;
}
float64_t f64_recip7(float64_t in)
{
union ui64_f64 uA;
float64_t f64_recip7(float64_t in) {
union ui64_f64 uA;
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x8000000000000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: //qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 11, 52,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
uA.f = in;
unsigned int ret = f64_classify(in);
bool sub = false;
bool round_abnormal = false;
switch (ret) {
case 0x001: // -inf
uA.ui = 0x8000000000000000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xfff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7ff0000000000000;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
[[fallthrough]];
case 0x200: // qNaN
uA.ui = defaultNaNF64UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 11, 52, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
break;
}
return uA.f;
}
return uA.f;
}
#ifdef __cplusplus
}
#endif

View file

@ -1,14 +1,22 @@
#include <stdint.h>
#include <softfloat_types.h>
uint_fast16_t f16_classify( float16_t );
float16_t f16_rsqrte7( float16_t );
float16_t f16_recip7( float16_t );
#ifdef __cplusplus
extern "C" {
#endif
uint_fast16_t f32_classify( float32_t );
float32_t f32_rsqrte7( float32_t );
float32_t f32_recip7( float32_t );
uint_fast16_t f16_classify(float16_t);
float16_t f16_rsqrte7(float16_t);
float16_t f16_recip7(float16_t);
uint_fast16_t f64_classify( float64_t );
float64_t f64_rsqrte7( float64_t );
float64_t f64_recip7( float64_t );
uint_fast16_t f32_classify(float32_t);
float32_t f32_rsqrte7(float32_t);
float32_t f32_recip7(float32_t);
uint_fast16_t f64_classify(float64_t);
float64_t f64_rsqrte7(float64_t);
float64_t f64_recip7(float64_t);
#ifdef __cplusplus
}
#endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,10 +16,10 @@
// return file extension
const char* fileExtension(const char* filepath) {
const char *ext = strrchr(filepath, '.');
if (ext == NULL || ext == filepath)
return "";
return ext + 1;
const char *ext = strrchr(filepath, '.');
if (ext == NULL || ext == filepath)
return "";
return ext + 1;
}
void* aligned_malloc(size_t size, size_t alignment) {

View file

@ -18,7 +18,12 @@ LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/execute_vector.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
# Add V extension sources
ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),)
SRCS += $(SRC_DIR)/execute_v.cpp
endif
# Debugging
ifdef DEBUG

View file

@ -390,7 +390,7 @@ static const char* op_string(const Instr &instr) {
default:
std::abort();
}
case Opcode::TCU:
switch(func3)
{
@ -405,36 +405,31 @@ static const char* op_string(const Instr &instr) {
}
}
inline void vec_log(std::ostream &os, const Instr &instr) {
if (instr.getVUseMask() & set_func3)
os << ", func3:" << instr.getFunc3();
if (instr.getVUseMask() & set_func6)
os << ", func6:" << instr.getFunc6();
if (instr.getVUseMask() & set_imm)
os << ", imm:" << instr.getImm();
if (instr.getVUseMask() & set_vlswidth)
inline void print_vec_attr(std::ostream &os, const Instr &instr) {
uint32_t mask = instr.getVattrMask();
if (mask & vattr_vlswidth)
os << ", width:" << instr.getVlsWidth();
if (instr.getVUseMask() & set_vmop)
if (mask & vattr_vmop)
os << ", mop:" << instr.getVmop();
if (instr.getVUseMask() & set_vumop)
if (mask & vattr_vumop)
os << ", umop:" << instr.getVumop();
if (instr.getVUseMask() & set_vnf)
if (mask & vattr_vnf)
os << ", nf:" << instr.getVnf();
if (instr.getVUseMask() & set_vmask)
if (mask & vattr_vmask)
os << ", vmask:" << instr.getVmask();
if (instr.getVUseMask() & set_vs3)
if (mask & vattr_vs3)
os << ", vs3:" << instr.getVs3();
if (instr.getVUseMask() & set_zimm)
if (mask & vattr_zimm)
os << ", zimm:" << ((instr.hasZimm()) ? "true" : "false");
if (instr.getVUseMask() & set_vlmul)
if (mask & vattr_vlmul)
os << ", lmul:" << instr.getVlmul();
if (instr.getVUseMask() & set_vsew)
if (mask & vattr_vsew)
os << ", sew:" << instr.getVsew();
if (instr.getVUseMask() & set_vta)
if (mask & vattr_vta)
os << ", ta:" << instr.getVta();
if (instr.getVUseMask() & set_vma)
if (mask & vattr_vma)
os << ", ma:" << instr.getVma();
if (instr.getVUseMask() & set_vediv)
if (mask & vattr_vediv)
os << ", ediv:" << instr.getVediv();
}
@ -463,8 +458,10 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
if (sep++ != 0) { os << ", "; } else { os << " "; }
os << "0x" << std::hex << instr.getRSrc(0);
}
// Log vector-specific vtype and vreg info
if (instr.isVec()) vec_log(os, instr);
// Log vector-specific attributes
if (instr.getVattrMask() != 0) {
print_vec_attr(os, instr);
}
return os;
}
}
@ -478,6 +475,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
auto func3 = (code >> shift_func3) & mask_func3;
auto func6 = (code >> shift_func6) & mask_func6;
auto func7 = (code >> shift_func7) & mask_func7;
__unused(func6);
auto rd = (code >> shift_rd) & mask_reg;
auto rs1 = (code >> shift_rs1) & mask_reg;
@ -690,9 +688,18 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
instr->setImm(sext(imm, width_j_imm+1));
} break;
case InstType::R4: {
instr->setDestReg(rd, RegType::Float);
instr->addSrcReg(rs1, RegType::Float);
instr->addSrcReg(rs2, RegType::Float);
instr->addSrcReg(rs3, RegType::Float);
instr->setFunc2(func2);
instr->setFunc3(func3);
} break;
#ifdef EXT_V_ENABLE
case InstType::V:
instr->setVec(true);
switch (op) {
case Opcode::VSET: {
instr->setDestReg(rd, RegType::Integer);
@ -738,7 +745,6 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
}
}
} break;
case Opcode::FL:
instr->addSrcReg(rs1, RegType::Integer);
instr->setVmop((code >> shift_vmop) & 0b11);
@ -788,14 +794,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
std::abort();
}
break;
case InstType::R4:
instr->setDestReg(rd, RegType::Float);
instr->addSrcReg(rs1, RegType::Float);
instr->addSrcReg(rs2, RegType::Float);
instr->addSrcReg(rs3, RegType::Float);
instr->setFunc2(func2);
instr->setFunc3(func3);
break;
#endif
default:
std::abort();

View file

@ -43,7 +43,9 @@ void Emulator::warp_t::clear(uint64_t startup_addr) {
this->uuid = 0;
this->fcsr = 0;
std::srand(50);
this->vtype = {0, 0, 0, 0, 0};
this->vl = 0;
this->VLMAX = 0;
for (auto& reg_file : this->ireg_file) {
for (auto& reg : reg_file) {
@ -102,6 +104,8 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
, scratchpad(std::vector<Word>(32 * 32 * 32768))
, csrs_(arch.num_warps())
{
std::srand(50);
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
csrs_.at(i).resize(arch.num_threads());
}
@ -142,8 +146,7 @@ void Emulator::clear() {
warps_[0].tmask.set(0);
wspawn_.valid = false;
for (auto& reg : scratchpad)
{
for (auto& reg : scratchpad) {
reg = 0;
}
}
@ -190,6 +193,7 @@ instr_trace_t* Emulator::step() {
assert(warp.tmask.any());
#ifndef NDEBUG
// generate unique universal instruction ID
uint32_t instr_uuid = warp.uuid++;
uint32_t g_wid = core_->id() * arch_.num_warps() + scheduled_warp;
uint64_t uuid = (uint64_t(g_wid) << 32) | instr_uuid;
@ -305,27 +309,26 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
#ifdef VM_ENABLE
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size);
try
try
{
mmu_.read(data, addr, size, ACCESS_TYPE::FETCH);
}
catch (Page_Fault_Exception& page_fault)
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
}
#else
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
mmu_.read(data, addr, size, 0);
mmu_.read(data, addr, size, 0);
}
#endif
#ifdef VM_ENABLE
void Emulator::set_satp(uint64_t satp) {
DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n");
set_csr(VX_CSR_SATP,satp,0,0);
set_csr(VX_CSR_SATP,satp,0,0);
}
#endif
@ -337,11 +340,11 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
if (type == AddrType::Shared) {
core_->local_mem()->read(data, addr, size);
} else {
try
try
{
mmu_.read(data, addr, size, ACCESS_TYPE::LOAD);
}
catch (Page_Fault_Exception& page_fault)
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
@ -373,16 +376,16 @@ void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
if (type == AddrType::Shared) {
core_->local_mem()->write(data, addr, size);
} else {
try
try
{
// mmu_.write(data, addr, size, 0);
mmu_.write(data, addr, size, ACCESS_TYPE::STORE);
}
catch (Page_Fault_Exception& page_fault)
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
}
}
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
@ -450,18 +453,15 @@ void Emulator::cout_flush() {
case (addr + (VX_CSR_MPM_BASE_H-VX_CSR_MPM_BASE)) : return ((value >> 32) & 0xFFFFFFFF)
#endif
Word Emulator::get_tiles()
{
Word Emulator::get_tiles() {
return mat_size;
}
Word Emulator::get_tc_size()
{
Word Emulator::get_tc_size() {
return tc_size;
}
Word Emulator::get_tc_num()
{
Word Emulator::get_tc_num() {
return tc_num;
}
@ -680,7 +680,7 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
case VX_TC_SIZE:
tc_size = value;
break;
default: {
std::cout << "Error: invalid CSR write addr=0x" << std::hex << addr << ", value=0x" << value << std::dec << std::endl;
std::abort();
@ -688,8 +688,6 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
}
}
uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) {
return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3;
}
@ -711,4 +709,4 @@ void Emulator::trigger_ecall() {
}
void Emulator::trigger_ebreak() {
active_warps_.reset();
}
}

View file

@ -28,76 +28,6 @@ class Core;
class Instr;
class instr_trace_t;
enum Constants {
width_opcode= 7,
width_reg = 5,
width_func2 = 2,
width_func3 = 3,
width_func6 = 6,
width_func7 = 7,
width_mop = 3,
width_vmask = 1,
width_i_imm = 12,
width_j_imm = 20,
width_v_zimm = 11,
width_v_ma = 1,
width_v_ta = 1,
width_v_sew = 3,
width_v_lmul = 3,
width_aq = 1,
width_rl = 1,
shift_opcode= 0,
shift_rd = width_opcode,
shift_func3 = shift_rd + width_reg,
shift_rs1 = shift_func3 + width_func3,
shift_rs2 = shift_rs1 + width_reg,
shift_func2 = shift_rs2 + width_reg,
shift_func7 = shift_rs2 + width_reg,
shift_rs3 = shift_func7 + width_func2,
shift_vmop = shift_func7 + width_vmask,
shift_vnf = shift_vmop + width_mop,
shift_func6 = shift_func7 + width_vmask,
shift_vset = shift_func7 + width_func6,
shift_v_sew = width_v_lmul,
shift_v_ta = shift_v_sew + width_v_sew,
shift_v_ma = shift_v_ta + width_v_ta,
mask_opcode = (1 << width_opcode) - 1,
mask_reg = (1 << width_reg) - 1,
mask_func2 = (1 << width_func2) - 1,
mask_func3 = (1 << width_func3) - 1,
mask_func6 = (1 << width_func6) - 1,
mask_func7 = (1 << width_func7) - 1,
mask_i_imm = (1 << width_i_imm) - 1,
mask_j_imm = (1 << width_j_imm) - 1,
mask_v_zimm = (1 << width_v_zimm) - 1,
mask_v_ma = (1 << width_v_ma) - 1,
mask_v_ta = (1 << width_v_ta) - 1,
mask_v_sew = (1 << width_v_sew) - 1,
mask_v_lmul = (1 << width_v_lmul) - 1,
};
struct vtype {
uint32_t vill;
uint32_t vma;
uint32_t vta;
uint32_t vsew;
uint32_t vlmul;
};
union reg_data_t {
Word u;
WordI i;
WordF f;
float f32;
double f64;
uint32_t u32;
uint64_t u64;
int32_t i32;
int64_t i64;
};
class Emulator {
public:
Emulator(const Arch &arch,
@ -126,11 +56,11 @@ public:
bool wspawn(uint32_t num_warps, Word nextPC);
int get_exitcode() const;
Word get_tiles();
Word get_tc_size();
Word get_tc_num();
void dcache_read(void* data, uint64_t addr, uint32_t size);
void dcache_write(const void* data, uint64_t addr, uint32_t size);
@ -151,6 +81,26 @@ private:
bool fallthrough;
};
struct vtype_t {
uint32_t vill;
uint32_t vma;
uint32_t vta;
uint32_t vsew;
uint32_t vlmul;
};
union reg_data_t {
Word u;
WordI i;
WordF f;
float f32;
double f64;
uint32_t u32;
uint64_t u64;
int32_t i32;
int64_t i64;
};
struct warp_t {
warp_t(const Arch& arch);
void clear(uint64_t startup_addr);
@ -162,11 +112,10 @@ private:
std::vector<std::vector<Byte>> vreg_file;
std::stack<ipdom_entry_t> ipdom_stack;
Byte fcsr;
vtype_t vtype;
uint32_t vl;
Word VLMAX;
uint32_t uuid;
struct vtype vtype;
uint32_t vl;
Word VLMAX;
};
struct wspawn_t {
@ -179,11 +128,11 @@ private:
void execute(const Instr &instr, uint32_t wid, instr_trace_t *trace);
void executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata);
#ifdef EXT_V_ENABLE
void loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata);
void storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata);
void executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata);
#endif
void icache_read(void* data, uint64_t addr, uint32_t size);
@ -203,9 +152,10 @@ private:
void update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid);
void trigger_ecall(); // Re-added for riscv-vector test functionality
void trigger_ebreak(); // Re-added for riscv-vector test functionality
// temporarily added for riscv-vector tests
// TODO: remove once ecall/ebreak are supported
void trigger_ecall();
void trigger_ebreak();
const Arch& arch_;
const DCRS& dcrs_;

View file

@ -677,7 +677,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
for (uint32_t t = thread_start; t < num_threads; ++t) {
if (!warp.tmask.test(t))
continue;
uint64_t mem_addr = rsdata[t][0].i + immsrc;
uint64_t mem_addr = rsdata[t][0].i + immsrc;
uint64_t read_data = 0;
this->dcache_read(&read_data, mem_addr, data_bytes);
trace_data->mem_addrs.at(t) = {mem_addr, data_bytes};
@ -703,12 +703,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
rddata[t].u64 = read_data;
break;
default:
std::abort();
std::abort();
}
}
rd_write = true;
} else {
loadVector(instr, wid, rsdata);
#ifdef EXT_V_ENABLE
this->loadVector(instr, wid, rsdata);
#endif
}
break;
}
@ -736,14 +738,16 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
case 1:
case 2:
case 3:
this->dcache_write(&write_data, mem_addr, data_bytes);
this->dcache_write(&write_data, mem_addr, data_bytes);
break;
default:
std::abort();
}
}
} else {
storeVector(instr, wid, rsdata);
#ifdef EXT_V_ENABLE
this->storeVector(instr, wid, rsdata);
#endif
}
break;
}
@ -1595,6 +1599,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
std::abort();
}
} break;
#ifdef EXT_V_ENABLE
case Opcode::VSET: {
auto func6 = instr.getFunc6();
if ((func3 == 0x7) || (func3 == 0x2 && func6 == 16) || (func3 == 0x1 && func6 == 16)) {
@ -1602,6 +1607,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
}
executeVector(instr, wid, rsdata, rddata);
} break;
#endif
default:
std::abort();
}

View file

@ -1132,7 +1132,7 @@ bool isMasked(std::vector<std::vector<Byte>> &vreg_file, uint32_t maskVreg, uint
auto& mask = vreg_file.at(maskVreg);
uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8);
uint8_t value = (emask >> (byteI % 8)) & 0x1;
DP(1, "Masking enabled: " << +!vmask << " mask element: " << +value);
DP(4, "Masking enabled: " << +!vmask << " mask element: " << +value);
return !vmask && value == 0;
}
@ -1164,14 +1164,14 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
}
for (uint32_t i = 0; i < vl * nfields; i++) {
if (isMasked(vreg_file, 0, i / nfields, vmask)) continue;
uint32_t nfields_strided = strided ? nfields : 1;
Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT);
Word mem_data = 0;
emul_->dcache_read(&mem_data, mem_addr, vsew / 8);
DP(1, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DP(4, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DT &result = getVregData<DT>(vreg_file, rdest + (i % nfields) * emul, i / nfields);
DP(1, "Previous data: " << +result);
DP(4, "Previous data: " << +result);
result = (DT) mem_data;
}
}
@ -1225,13 +1225,13 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
std::cout << "Unsupported iSew: " << iSew << std::endl;
std::abort();
}
Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT);
Word mem_data = 0;
emul_->dcache_read(&mem_data, mem_addr, vsew / 8);
DP(1, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DP(4, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DT &result = getVregData<DT>(vreg_file, rdest + (i % nfields) * emul, i / nfields);
DP(1, "Previous data: " << +result);
DP(4, "Previous data: " << +result);
result = (DT) mem_data;
}
}
@ -1256,104 +1256,6 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
}
}
void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
auto &warp = warps_.at(wid);
auto vmask = instr.getVmask();
auto rdest = instr.getRDest();
auto mop = instr.getVmop();
switch (mop) {
case 0b00: { // unit-stride
auto lumop = instr.getVumop();
switch (lumop) {
case 0b10000: // vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride
// vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v
// vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v
// vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v
// vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v
// vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v
// vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v
// vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v
case 0b0000: { // vle8.v, vle16.v, vle32.v, vle64.v
// vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v
// vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v
// vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v
// vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v
// vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v
// vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v
// vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v
WordI stride = warp.vtype.vsew / 8;
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b1000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v
uint32_t nreg = instr.getVnf() + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl;
std::abort();
}
DP(1, "Whole vector register load with nreg: " << nreg);
uint32_t vl = nreg * VLEN / instr.getVsew();
WordI stride = instr.getVsew() / 8;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, instr.getVsew(), vl, false, stride, 1, 0, vmask);
break;
}
case 0b1011: { // vlm.v
if (warp.vtype.vsew != 8) {
std::cout << "vlm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
std::abort();
}
WordI stride = warp.vtype.vsew / 8;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
break;
}
default:
std::cout << "Load vector - unsupported lumop: " << lumop << std::endl;
std::abort();
}
break;
}
case 0b10: { // strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v
// vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v
// vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v
// vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v
// vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v
// vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v
// vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v
// vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v
auto rsrc1 = instr.getRSrc(1);
auto rdest = instr.getRDest();
WordI stride = warp.ireg_file.at(0).at(rsrc1);
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v
// vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v
// vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v
// vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v
// vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v
// vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v
// vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v
// vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v
case 0b11: { // indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v
// vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v
// vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v
// vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v
// vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v
// vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
uint32_t nfields = instr.getVnf() + 1;
vector_op_vv_load(warp.vreg_file, this, rsdata, instr.getRSrc(1), rdest, warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
break;
}
default:
std::cout << "Load vector - unsupported mop: " << mop << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, std::vector<reg_data_t[3]> &rsdata, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
@ -1364,7 +1266,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
uint32_t nfields_strided = strided ? nfields : 1;
Word mem_addr = rsdata[0][0].i + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT);
Word mem_data = getVregData<DT>(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields);
DP(1, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DP(4, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
emul_->dcache_write(&mem_data, mem_addr, vsew / 8);
}
}
@ -1417,7 +1319,7 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
Word mem_addr = rsdata[0][0].i + offset + (i % nfields) * sizeof(DT);
Word mem_data = getVregData<DT>(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields);
DP(1, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DP(4, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
emul_->dcache_write(&mem_data, mem_addr, vsew / 8);
}
}
@ -1442,97 +1344,16 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
}
}
void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
auto &warp = warps_.at(wid);
auto vmask = instr.getVmask();
auto mop = instr.getVmop();
switch (mop) {
case 0b00: { // unit-stride
auto vs3 = instr.getRSrc(1);
auto sumop = instr.getVumop();
WordI stride = warp.vtype.vsew / 8;
switch (sumop) {
case 0b0000: { // vse8.v, vse16.v, vse32.v, vse64.v
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b1000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v
uint32_t nreg = instr.getVnf() + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl;
std::abort();
}
DP(1, "Whole vector register store with nreg: " << nreg);
uint32_t vl = nreg * VLEN / 8;
vector_op_vix_store<uint8_t>(warp.vreg_file, this, rsdata, vs3, vl, false, stride, 1, 0, vmask);
break;
}
case 0b1011: { // vsm.v
if (warp.vtype.vsew != 8) {
std::cout << "vsm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
std::abort();
}
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
break;
}
default:
std::cout << "Store vector - unsupported sumop: " << sumop << std::endl;
std::abort();
}
break;
}
case 0b10: { // strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v
// vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v
// vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v
// vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v
// vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v
// vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v
// vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v
// vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v
auto rsrc1 = instr.getRSrc(1);
auto vs3 = instr.getRSrc(2);
WordI stride = warp.ireg_file.at(0).at(rsrc1);
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v
// vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v
// vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v
// vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v
// vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v
// vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v
// vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v
// vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v
case 0b11: { // indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v
// vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v
// vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v
// vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v
// vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v
// vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
uint32_t nfields = instr.getVnf() + 1;
vector_op_vv_store(warp.vreg_file, this, rsdata, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
break;
}
default:
std::cout << "Store vector - unsupported mop: " << mop << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vix(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask)
{
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask)) continue;
DT second = getVregData<DT>(vreg_file, rsrc0, i);
DT third = getVregData<DT>(vreg_file, rdest, i);
DT result = OP<DT, DT>::apply(first, second, third);
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
@ -1557,11 +1378,11 @@ void vector_op_vix(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vix_carry(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl)
{
for (uint32_t i = 0; i < vl; i++) {
for (uint32_t i = 0; i < vl; i++) {
DT second = getVregData<DT>(vreg_file, rsrc0, i);
bool third = !isMasked(vreg_file, 0, i, false);
DT result = OP<DT, DT>::apply(first, second, third);
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
@ -1586,11 +1407,11 @@ void vector_op_vix_carry(Word src1, std::vector<std::vector<Byte>> &vreg_file, u
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vix_carry_out(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask)
{
for (uint32_t i = 0; i < vl; i++) {
for (uint32_t i = 0; i < vl; i++) {
DT second = getVregData<DT>(vreg_file, rsrc0, i);
bool third = !vmask && !isMasked(vreg_file, 0, i, vmask);
bool result = OP<DT, DTR>::apply(first, second, third);
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
@ -1621,7 +1442,7 @@ void vector_op_vix_merge(DT first, std::vector<std::vector<Byte>> &vreg_file, ui
{
for (uint32_t i = 0; i < vl; i++) {
DT result = isMasked(vreg_file, 0, i, vmask) ? getVregData<DT>(vreg_file, rsrc0, i) : first;
DP(1, "Merge - Choosing result: " << +result);
DP(4, "Merge - Choosing result: " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
@ -1673,7 +1494,7 @@ void vector_op_vix_w(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
DT second = getVregData<DT>(vreg_file, rsrc0, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DT, DTR>::apply(first, second, third);
DP(1, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -1716,7 +1537,7 @@ void vector_op_vix_n(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
DT second = getVregData<DT>(vreg_file, rsrc0, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(1, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
DP(4, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -1744,7 +1565,7 @@ void vector_op_vix_sat(DTR first, std::vector<std::vector<Byte>> &vreg_file, uin
DT second = getVregData<DTR>(vreg_file, rsrc0, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(1, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
DP(4, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -1854,7 +1675,7 @@ void vector_op_vix_mask(DT first, std::vector<std::vector<Byte>> &vreg_file, uin
DT second = getVregData<DT>(vreg_file, rsrc0, i);
bool result = OP<DT, bool>::apply(first, second, 0);
DP(1, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
DP(4, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
@ -1889,7 +1710,7 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,
// If scalar set is set this means we have a v(f)slide1up or v(f)slide1down instruction,
// so first is our scalar value and we need to overwrite it with 1 for later computations
if (scalar && vl && !isMasked(vreg_file, 0, scalarPos, vmask)) {
DP(1, "Slide - Moving scalar value " << +first << " to position " << +scalarPos);
DP(4, "Slide - Moving scalar value " << +first << " to position " << +scalarPos);
getVregData<DT>(vreg_file, rdest, scalarPos) = first;
}
first = scalar ? 1 : first;
@ -1899,7 +1720,7 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,
__uint128_t iSrc = slideDown ? (__uint128_t)i + (__uint128_t)first : (__uint128_t)i - (__uint128_t)first; // prevent overflows/underflows
DT value = (!slideDown || iSrc < VLMAX) ? getVregData<DT>(vreg_file, rsrc0, iSrc) : 0;
DP(1, "Slide - Moving value " << +value << " from position " << (uint64_t)iSrc << " to position " << +i);
DP(4, "Slide - Moving value " << +value << " from position " << (uint64_t)iSrc << " to position " << +i);
getVregData<DT>(vreg_file, rdest, i) = value;
}
}
@ -1928,7 +1749,7 @@ void vector_op_vix_gather(Word first, std::vector<std::vector<Byte>> &vreg_file,
if (isMasked(vreg_file, 0, i, vmask)) continue;
DT value = first < VLMAX ? getVregData<DT>(vreg_file, rsrc0, first) : 0;
DP(1, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
DP(4, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
getVregData<DT>(vreg_file, rdest, i) = value;
}
}
@ -1960,7 +1781,7 @@ void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uin
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DT third = getVregData<DT>(vreg_file, rdest, i);
DT result = OP<DT, DT>::apply(first, second, third);
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
@ -1990,7 +1811,7 @@ void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
DT second = getVregData<DT>(vreg_file, rsrc1, i);
bool third = !isMasked(vreg_file, 0, i, false);
DT result = OP<DT, DT>::apply(first, second, third);
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
@ -2020,7 +1841,7 @@ void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t
DT second = getVregData<DT>(vreg_file, rsrc1, i);
bool third = !vmask && !isMasked(vreg_file, 0, i, vmask);
bool result = OP<DT, DTR>::apply(first, second, third);
DP(1, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
@ -2052,7 +1873,7 @@ void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
for (uint32_t i = 0; i < vl; i++) {
uint32_t rsrc = isMasked(vreg_file, 0, i, vmask) ? rsrc1 : rsrc0;
DT result = getVregData<DT>(vreg_file, rsrc, i);
DP(1, "Merge - Choosing result: " << +result);
DP(4, "Merge - Choosing result: " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
@ -2082,7 +1903,7 @@ void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr
uint32_t first = ei16 ? getVregData<uint16_t>(vreg_file, rsrc0, i) : getVregData<DT>(vreg_file, rsrc0, i);
DT value = first < VLMAX ? getVregData<DT>(vreg_file, rsrc1, first) : 0;
DP(1, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
DP(4, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
getVregData<DT>(vreg_file, rdest, i) = value;
}
}
@ -2114,7 +1935,7 @@ void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DT, DTR>::apply(first, second, third);
DP(1, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -2144,7 +1965,7 @@ void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
DTR second = getVregData<DTR>(vreg_file, rsrc1, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DTR, DTR>::apply(first, second, third);
DP(1, "Widening wv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, "Widening wv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -2174,7 +1995,7 @@ void vector_op_vv_wfv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
DTR second = getVregData<DTR>(vreg_file, rsrc1, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DTR, DTR>::apply(rv_ftod(first), second, third);
DP(1, "Widening wfv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
DP(4, "Widening wfv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -2199,7 +2020,7 @@ void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
DTR first = getVregData<DTR>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(1, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
DP(4, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -2228,7 +2049,7 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
DT first = getVregData<DTR>(vreg_file, rsrc0, i);
DT second = getVregData<DTR>(vreg_file, rsrc1, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(1, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
DP(4, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
@ -2280,9 +2101,9 @@ void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
DT first = getVregData<DT>(vreg_file, rdest, 0);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DT result = OP<DT, DT>::apply(first, second, 0);
DP(1, "Reduction " << (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
DP(4, "Reduction " << (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, 0) = result;
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
@ -2316,9 +2137,9 @@ void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR second_w = std::is_signed<DT>() ? sext((DTR) second, sizeof(DT) * 8) : zext((DTR) second, sizeof(DT) * 8);
DTR result = OP<DTR, DTR>::apply(first, second_w, 0);
DP(1, "Widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
DP(4, "Widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, 0) = result;
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
@ -2350,9 +2171,9 @@ void vector_op_vv_red_wf(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR second_w = rv_ftod(second);
DTR result = OP<DTR, DTR>::apply(first, second_w, 0);
DP(1, "Float widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
DP(4, "Float widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, 0) = result;
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
@ -2372,9 +2193,9 @@ void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, ui
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask)) continue;
DP(1, "Element Index = " << +i);
DP(4, "Element Index = " << +i);
getVregData<DT>(vreg_file, rdest, i) = i;
}
}
}
void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask)
@ -2402,7 +2223,7 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
bool result = OP<DT, bool>::apply(first, second, 0);
DP(1, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
DP(4, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
@ -2437,7 +2258,7 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0
uint8_t secondMask = getVregData<uint8_t>(vreg_file, rsrc1, i / 8);
bool second = (secondMask >> (i % 8)) & 0x1;
bool result = OP<uint8_t, uint8_t>::apply(first, second, 0) & 0x1;
DP(1, "Compare mask bits " << (OP<uint8_t, uint8_t>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
DP(4, "Compare mask bits " << (OP<uint8_t, uint8_t>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
@ -2456,7 +2277,7 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r
if (isMasked(vreg_file, rsrc0, i, 0)) continue;
DT value = getVregData<DT>(vreg_file, rsrc1, i);
DP(1, "Compression - Moving value " << +value << " from position " << i << " to position " << currPos);
DP(4, "Compression - Moving value " << +value << " from position " << i << " to position " << currPos);
getVregData<DT>(vreg_file, rdest, currPos) = value;
currPos++;
}
@ -2479,6 +2300,185 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r
}
}
void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
auto &warp = warps_.at(wid);
auto vmask = instr.getVmask();
auto rdest = instr.getRDest();
auto mop = instr.getVmop();
switch (mop) {
case 0b00: { // unit-stride
auto lumop = instr.getVumop();
switch (lumop) {
case 0b10000: // vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride
// vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v
// vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v
// vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v
// vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v
// vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v
// vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v
// vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v
case 0b0000: { // vle8.v, vle16.v, vle32.v, vle64.v
// vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v
// vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v
// vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v
// vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v
// vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v
// vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v
// vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v
WordI stride = warp.vtype.vsew / 8;
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b1000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v
uint32_t nreg = instr.getVnf() + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl;
std::abort();
}
DP(4, "Whole vector register load with nreg: " << nreg);
uint32_t vl = nreg * VLEN / instr.getVsew();
WordI stride = instr.getVsew() / 8;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, instr.getVsew(), vl, false, stride, 1, 0, vmask);
break;
}
case 0b1011: { // vlm.v
if (warp.vtype.vsew != 8) {
std::cout << "vlm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
std::abort();
}
WordI stride = warp.vtype.vsew / 8;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
break;
}
default:
std::cout << "Load vector - unsupported lumop: " << lumop << std::endl;
std::abort();
}
break;
}
case 0b10: { // strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v
// vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v
// vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v
// vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v
// vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v
// vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v
// vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v
// vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v
auto rsrc1 = instr.getRSrc(1);
auto rdest = instr.getRDest();
WordI stride = warp.ireg_file.at(0).at(rsrc1);
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v
// vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v
// vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v
// vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v
// vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v
// vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v
// vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v
// vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v
case 0b11: { // indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v
// vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v
// vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v
// vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v
// vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v
// vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
uint32_t nfields = instr.getVnf() + 1;
vector_op_vv_load(warp.vreg_file, this, rsdata, instr.getRSrc(1), rdest, warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
break;
}
default:
std::cout << "Load vector - unsupported mop: " << mop << std::endl;
std::abort();
}
}
void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
auto &warp = warps_.at(wid);
auto vmask = instr.getVmask();
auto mop = instr.getVmop();
switch (mop) {
case 0b00: { // unit-stride
auto vs3 = instr.getRSrc(1);
auto sumop = instr.getVumop();
WordI stride = warp.vtype.vsew / 8;
switch (sumop) {
case 0b0000: { // vse8.v, vse16.v, vse32.v, vse64.v
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b1000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v
uint32_t nreg = instr.getVnf() + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl;
std::abort();
}
DP(4, "Whole vector register store with nreg: " << nreg);
uint32_t vl = nreg * VLEN / 8;
vector_op_vix_store<uint8_t>(warp.vreg_file, this, rsdata, vs3, vl, false, stride, 1, 0, vmask);
break;
}
case 0b1011: { // vsm.v
if (warp.vtype.vsew != 8) {
std::cout << "vsm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl;
std::abort();
}
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true);
break;
}
default:
std::cout << "Store vector - unsupported sumop: " << sumop << std::endl;
std::abort();
}
break;
}
case 0b10: { // strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v
// vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v
// vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v
// vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v
// vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v
// vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v
// vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v
// vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v
auto rsrc1 = instr.getRSrc(1);
auto vs3 = instr.getRSrc(2);
WordI stride = warp.ireg_file.at(0).at(rsrc1);
uint32_t nfields = instr.getVnf() + 1;
vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask);
break;
}
case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v
// vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v
// vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v
// vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v
// vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v
// vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v
// vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v
// vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v
case 0b11: { // indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v
// vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v
// vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v
// vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v
// vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v
// vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
uint32_t nfields = instr.getVnf() + 1;
vector_op_vv_store(warp.vreg_file, this, rsdata, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask);
break;
}
default:
std::cout << "Store vector - unsupported mop: " << mop << std::endl;
std::abort();
}
}
void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata) {
auto &warp = warps_.at(wid);
auto func3 = instr.getFunc3();
@ -2491,10 +2491,10 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
auto uimmsrc = (Word)instr.getImm();
auto vmask = instr.getVmask();
auto num_threads = arch_.num_threads();
switch (func3) {
case 0: { // vector - vector
switch (func6) {
switch (func6) {
case 0: { // vadd.vv
for (uint32_t t = 0; t < num_threads; ++t) {
if (!warp.tmask.test(t)) continue;
@ -2769,7 +2769,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
default:
std::cout << "Unrecognised vector - vector instruction func3: " << func3 << " func6: " << func6 << std::endl;
std::abort();
}
}
} break;
case 1: { // float vector - vector
switch (func6) {
@ -2839,7 +2839,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
if (!warp.tmask.test(t)) continue;
auto &dest = rddata[t].u64;
vector_op_scalar(dest, warp.vreg_file, rsrc0, rsrc1, warp.vtype.vsew);
DP(1, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
DP(4, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
}
} break;
case 18: {
@ -3107,7 +3107,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
if (!warp.tmask.test(t)) continue;
auto &dest = rddata[t].i;
vector_op_scalar(dest, warp.vreg_file, rsrc0, rsrc1, warp.vtype.vsew);
DP(1, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
DP(4, "Moved " << +dest << " from: " << +rsrc1 << " to: " << +rdest);
}
} break;
case 18: { // vzext.vf8, vsext.vf8, vzext.vf4, vsext.vf4, vzext.vf2, vsext.vf2
@ -4438,7 +4438,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
uint32_t vsew = instr.getVsew();
uint32_t vlmul = instr.getVlmul();
if(!instr.hasZimm()){ // vsetvl
if (!instr.hasZimm()) { // vsetvl
uint32_t zimm = rsdata[0][1].u;
vlmul = zimm & mask_v_lmul;
vsewO = (zimm >> shift_v_sew) & mask_v_sew;
@ -4459,7 +4459,7 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
s0 = rsdata[0][0].u;
}
DP(1, "Vset(i)vl(i) - vill: " << +warp.vtype.vill << " vma: " << vma << " vta: " << vta << " lmul: " << vlmul << " sew: " << vsew << " s0: " << s0 << " VLMAX: " << warp.VLMAX);
DP(4, "Vset(i)vl(i) - vill: " << +warp.vtype.vill << " vma: " << vma << " vta: " << vta << " lmul: " << vlmul << " sew: " << vsew << " s0: " << s0 << " VLMAX: " << warp.VLMAX);
warp.vl = std::min(s0, warp.VLMAX);
if (warp.vtype.vill) {
@ -4490,4 +4490,4 @@ void Emulator::executeVector(const Instr &instr, uint32_t wid, std::vector<reg_d
std::cout << "Unrecognised vector instruction func3: " << func3 << " func6: " << func6 << std::endl;
std::abort();
}
}
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,8 +17,8 @@
namespace vortex {
enum class Opcode {
NONE = 0,
enum class Opcode {
NONE = 0,
R = 0x33,
L = 0x3,
I = 0x13,
@ -38,11 +38,11 @@ enum class Opcode {
FMADD = 0x43,
FMSUB = 0x47,
FMNMSUB = 0x4b,
FMNMADD = 0x4f,
FMNMADD = 0x4f,
// RV64 Standard Extension
R_W = 0x3b,
I_W = 0x1b,
// Vector Extension
// Vector Extension
VSET = 0x57,
// Custom Extensions
EXT1 = 0x0b,
@ -52,37 +52,84 @@ enum class Opcode {
};
enum class InstType {
R,
I,
S,
B,
U,
R,
I,
S,
B,
U,
J,
V,
R4
};
enum set_vuse_mask {
set_func3 = (1 << 0),
set_func6 = (1 << 1),
set_imm = (1 << 2),
set_vlswidth = (1 << 3),
set_vmop = (1 << 4),
set_vumop = (1 << 5),
set_vnf = (1 << 6),
set_vmask = (1 << 7),
set_vs3 = (1 << 8),
set_zimm = (1 << 9),
set_vlmul = (1 << 10),
set_vsew = (1 << 11),
set_vta = (1 << 12),
set_vma = (1 << 13),
set_vediv = (1 << 14)
enum DecodeConstants {
width_opcode= 7,
width_reg = 5,
width_func2 = 2,
width_func3 = 3,
width_func6 = 6,
width_func7 = 7,
width_mop = 3,
width_vmask = 1,
width_i_imm = 12,
width_j_imm = 20,
width_v_zimm = 11,
width_v_ma = 1,
width_v_ta = 1,
width_v_sew = 3,
width_v_lmul = 3,
width_aq = 1,
width_rl = 1,
shift_opcode= 0,
shift_rd = width_opcode,
shift_func3 = shift_rd + width_reg,
shift_rs1 = shift_func3 + width_func3,
shift_rs2 = shift_rs1 + width_reg,
shift_func2 = shift_rs2 + width_reg,
shift_func7 = shift_rs2 + width_reg,
shift_rs3 = shift_func7 + width_func2,
shift_vmop = shift_func7 + width_vmask,
shift_vnf = shift_vmop + width_mop,
shift_func6 = shift_func7 + width_vmask,
shift_vset = shift_func7 + width_func6,
shift_v_sew = width_v_lmul,
shift_v_ta = shift_v_sew + width_v_sew,
shift_v_ma = shift_v_ta + width_v_ta,
mask_opcode = (1 << width_opcode) - 1,
mask_reg = (1 << width_reg) - 1,
mask_func2 = (1 << width_func2) - 1,
mask_func3 = (1 << width_func3) - 1,
mask_func6 = (1 << width_func6) - 1,
mask_func7 = (1 << width_func7) - 1,
mask_i_imm = (1 << width_i_imm) - 1,
mask_j_imm = (1 << width_j_imm) - 1,
mask_v_zimm = (1 << width_v_zimm) - 1,
mask_v_ma = (1 << width_v_ma) - 1,
mask_v_ta = (1 << width_v_ta) - 1,
mask_v_sew = (1 << width_v_sew) - 1,
mask_v_lmul = (1 << width_v_lmul) - 1,
};
enum VectorAttrMask {
vattr_vlswidth = (1 << 3),
vattr_vmop = (1 << 4),
vattr_vumop = (1 << 5),
vattr_vnf = (1 << 6),
vattr_vmask = (1 << 7),
vattr_vs3 = (1 << 8),
vattr_zimm = (1 << 9),
vattr_vlmul = (1 << 10),
vattr_vsew = (1 << 11),
vattr_vta = (1 << 12),
vattr_vma = (1 << 13),
vattr_vediv = (1 << 14)
};
class Instr {
public:
Instr()
Instr()
: opcode_(Opcode::NONE)
, num_rsrcs_(0)
, has_imm_(false)
@ -105,60 +152,72 @@ public:
, vta_(0)
, vma_(0)
, vediv_(0)
, _vusemask(0)
, _is_vec(false) {
, vattr_mask_(0) {
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
rsrc_type_[i] = RegType::None;
rsrc_[i] = 0;
}
}
void setOpcode(Opcode opcode) { opcode_ = opcode; }
void setDestReg(uint32_t destReg, RegType type) {
rdest_type_ = type;
rdest_ = destReg;
void setOpcode(Opcode opcode) {
opcode_ = opcode;
}
void addSrcReg(uint32_t srcReg, RegType type) {
rsrc_type_[num_rsrcs_] = type;
rsrc_[num_rsrcs_] = srcReg;
void setDestReg(uint32_t destReg, RegType type) {
rdest_type_ = type;
rdest_ = destReg;
}
void addSrcReg(uint32_t srcReg, RegType type) {
rsrc_type_[num_rsrcs_] = type;
rsrc_[num_rsrcs_] = srcReg;
++num_rsrcs_;
}
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
rsrc_type_[index] = type;
rsrc_[index] = srcReg;
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
rsrc_type_[index] = type;
rsrc_[index] = srcReg;
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
}
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; }
void setFunc2(uint32_t func2) { func2_ = func2; }
void setFunc3(uint32_t func3) { func3_ = func3; _vusemask |= set_func3; }
void setFunc6(uint32_t func6) { func6_ = func6; _vusemask |= set_func6; }
void setFunc3(uint32_t func3) { func3_ = func3; }
void setFunc6(uint32_t func6) { func6_ = func6; }
void setFunc7(uint32_t func7) { func7_ = func7; }
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; _vusemask |= set_imm; }
void setVlsWidth(uint32_t width) { vlsWidth_ = width; _vusemask |= set_vlswidth; }
void setVmop(uint32_t mop) { vMop_ = mop; _vusemask |= set_vmop; }
void setVumop(uint32_t umop) { vUmop_ = umop; _vusemask |= set_vumop; }
void setVnf(uint32_t nf) { vNf_ = nf; _vusemask |= set_vnf; }
void setVmask(uint32_t mask) { vmask_ = mask; _vusemask |= set_vmask; }
void setVs3(uint32_t vs) { vs3_ = vs; _vusemask |= set_vs3; }
void setZimm(bool has_zimm) { has_zimm_ = has_zimm; _vusemask |= set_zimm; }
void setVlmul(uint32_t lmul) { vlmul_ = lmul; _vusemask |= set_vlmul; }
void setVsew(uint32_t sew) { vsew_ = sew; _vusemask |= set_vsew; }
void setVta(uint32_t vta) { vta_ = vta; _vusemask |= set_vta; }
void setVma(uint32_t vma) { vma_ = vma; _vusemask |= set_vma; }
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; _vusemask |= set_vediv; }
void setVec(bool is_vec) { _is_vec = is_vec; }
// Attributes for Vector instructions
void setVlsWidth(uint32_t width) { vlsWidth_ = width; vattr_mask_ |= vattr_vlswidth; }
void setVmop(uint32_t mop) { vMop_ = mop; vattr_mask_ |= vattr_vmop; }
void setVumop(uint32_t umop) { vUmop_ = umop; vattr_mask_ |= vattr_vumop; }
void setVnf(uint32_t nf) { vNf_ = nf; vattr_mask_ |= vattr_vnf; }
void setVmask(uint32_t mask) { vmask_ = mask; vattr_mask_ |= vattr_vmask; }
void setVs3(uint32_t vs) { vs3_ = vs; vattr_mask_ |= vattr_vs3; }
void setZimm(bool has_zimm) { has_zimm_ = has_zimm; vattr_mask_ |= vattr_zimm; }
void setVlmul(uint32_t lmul) { vlmul_ = lmul; vattr_mask_ |= vattr_vlmul; }
void setVsew(uint32_t sew) { vsew_ = sew; vattr_mask_ |= vattr_vsew; }
void setVta(uint32_t vta) { vta_ = vta; vattr_mask_ |= vattr_vta; }
void setVma(uint32_t vma) { vma_ = vma; vattr_mask_ |= vattr_vma; }
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; vattr_mask_ |= vattr_vediv; }
Opcode getOpcode() const { return opcode_; }
uint32_t getNRSrc() const { return num_rsrcs_; }
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
uint32_t getRDest() const { return rdest_; }
RegType getRDType() const { return rdest_type_; }
bool hasImm() const { return has_imm_; }
uint32_t getImm() const { return imm_; }
uint32_t getFunc2() const { return func2_; }
uint32_t getFunc3() const { return func3_; }
uint32_t getFunc6() const { return func6_; }
uint32_t getFunc7() const { return func7_; }
uint32_t getNRSrc() const { return num_rsrcs_; }
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
uint32_t getRDest() const { return rdest_; }
RegType getRDType() const { return rdest_type_; }
bool hasImm() const { return has_imm_; }
uint32_t getImm() const { return imm_; }
uint32_t getVlsWidth() const { return vlsWidth_; }
uint32_t getVmop() const { return vMop_; }
uint32_t getVumop() const { return vUmop_; }
@ -172,8 +231,7 @@ public:
uint32_t getVta() const { return vta_; }
uint32_t getVma() const { return vma_; }
uint32_t getVediv() const { return vediv_; }
uint32_t getVUseMask() const { return _vusemask; }
bool isVec() const { return _is_vec; }
uint32_t getVattrMask() const { return vattr_mask_; }
private:
@ -187,7 +245,7 @@ private:
RegType rdest_type_;
uint32_t imm_;
RegType rsrc_type_[MAX_REG_SOURCES];
uint32_t rsrc_[MAX_REG_SOURCES];
uint32_t rsrc_[MAX_REG_SOURCES];
uint32_t rdest_;
uint32_t func2_;
uint32_t func3_;
@ -207,8 +265,7 @@ private:
uint32_t vta_;
uint32_t vma_;
uint32_t vediv_;
uint32_t _vusemask;
bool _is_vec;
uint32_t vattr_mask_;
friend std::ostream &operator<<(std::ostream &, const Instr&);
};