vortex/sim/simx/vpu.h
MichaelJSr a2cfeffcfe Added ifndef statements for the vector extension anywhere they didn't exist already
Added ifndef statements for the vector extension anywhere they didn't exist already

more ifdef statements

more ifdef

Update decode.cpp

Update decode.cpp

Update decode.cpp
2025-01-14 21:29:47 -08:00

2393 lines
No EOL
83 KiB
C++

#ifdef EXT_V_ENABLE
#pragma once
using namespace vortex;
template <typename T, typename R>
class Add {
public:
static R apply(T first, T second, R) {
return (R)first + (R)second;
}
static std::string name() { return "Add"; }
};
template <typename T, typename R>
class Sub {
public:
static R apply(T first, T second, R) {
return (R)second - (R)first;
}
static std::string name() { return "Sub"; }
};
template <typename T, typename R>
class Adc {
public:
static R apply(T first, T second, R third) {
return (R)first + (R)second + third;
}
static std::string name() { return "Adc"; }
};
template <typename T, typename R>
class Madc {
public:
static R apply(T first, T second, R third) {
return ((R)first + (R)second + third) > (R)std::numeric_limits<T>::max();
}
static std::string name() { return "Madc"; }
};
template <typename T, typename R>
class Sbc {
public:
static R apply(T first, T second, R third) {
return (R)second - (R)first - third;
}
static std::string name() { return "Sbc"; }
};
template <typename T, typename R>
class Msbc {
public:
static R apply(T first, T second, R third) {
return (R)second < ((R)first + third);
}
static std::string name() { return "Msbc"; }
};
template <typename T, typename R>
class Ssub {
public:
static R apply(T first, T second, uint32_t, uint32_t &vxsat_) {
// rounding mode is not relevant for this operation
T unclippedResult = second - first;
R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits<R>::min(), (T)std::numeric_limits<R>::max());
vxsat_ |= clippedResult != unclippedResult;
return clippedResult;
}
static std::string name() { return "Ssub"; }
};
template <typename T, typename R>
class Ssubu {
public:
static R apply(T first, T second, uint32_t, uint32_t &vxsat_) {
// rounding mode is not relevant for this operation
if (first > second) {
vxsat_ = true;
return 0;
} else {
vxsat_ = false;
return second - first;
}
}
static std::string name() { return "Ssubu"; }
};
template <typename T, typename R>
class Sadd {
public:
static R apply(T first, T second, uint32_t, uint32_t &vxsat_) {
// rounding mode is not relevant for this operation
T unclippedResult = second + first;
R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits<R>::min(), (T)std::numeric_limits<R>::max());
vxsat_ |= clippedResult != unclippedResult;
return clippedResult;
}
static std::string name() { return "Sadd"; }
};
template <typename T, typename R>
class Rsub {
public:
static R apply(T first, T second, R) {
return first - second;
}
static std::string name() { return "Rsub"; }
};
template <typename T, typename R>
class Div {
public:
static R apply(T first, T second, R) {
// logic taken from scalar div
if (first == 0) {
return -1;
} else if (second == std::numeric_limits<T>::min() && first == T(-1)) {
return second;
} else {
return (R)second / (R)first;
}
}
static std::string name() { return "Div"; }
};
template <typename T, typename R>
class Rem {
public:
static R apply(T first, T second, R) {
// logic taken from scalar rem
if (first == 0) {
return second;
} else if (second == std::numeric_limits<T>::min() && first == T(-1)) {
return 0;
} else {
return (R)second % (R)first;
}
}
static std::string name() { return "Rem"; }
};
template <typename T, typename R>
class Mul {
public:
static R apply(T first, T second, R) {
return (R)first * (R)second;
}
static std::string name() { return "Mul"; }
};
template <typename T, typename R>
class Mulsu {
public:
static R apply(T first, T second, R) {
R first_ext = zext((R)first, (sizeof(T) * 8));
return first_ext * (R)second;
}
static std::string name() { return "Mulsu"; }
};
template <typename T, typename R>
class Mulh {
public:
static R apply(T first, T second, R) {
__int128_t first_ext = sext((__int128_t)first, (sizeof(T) * 8));
__int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8));
return (first_ext * second_ext) >> (sizeof(T) * 8);
}
static std::string name() { return "Mulh"; }
};
template <typename T, typename R>
class Mulhsu {
public:
static R apply(T first, T second, R) {
__int128_t first_ext = zext((__int128_t)first, (sizeof(T) * 8));
__int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8));
return (first_ext * second_ext) >> (sizeof(T) * 8);
}
static std::string name() { return "Mulhsu"; }
};
template <typename T, typename R>
class Mulhu {
public:
static R apply(T first, T second, R) {
return ((__uint128_t)first * (__uint128_t)second) >> (sizeof(T) * 8);
}
static std::string name() { return "Mulhu"; }
};
template <typename T, typename R>
class Madd {
public:
static R apply(T first, T second, R third) {
return ((R)first * third) + (R)second;
}
static std::string name() { return "Madd"; }
};
template <typename T, typename R>
class Nmsac {
public:
static R apply(T first, T second, R third) {
return -((R)first * (R)second) + third;
}
static std::string name() { return "Nmsac"; }
};
template <typename T, typename R>
class Macc {
public:
static R apply(T first, T second, R third) {
return ((R)first * (R)second) + third;
}
static std::string name() { return "Macc"; }
};
template <typename T, typename R>
class Maccsu {
public:
static R apply(T first, T second, R third) {
R first_ext = sext((R)first, (sizeof(T) * 8));
R second_ext = zext((R)second, (sizeof(T) * 8));
return (first_ext * second_ext) + third;
}
static std::string name() { return "Maccsu"; }
};
template <typename T, typename R>
class Maccus {
public:
static R apply(T first, T second, R third) {
R first_ext = zext((R)first, (sizeof(T) * 8));
R second_ext = sext((R)second, (sizeof(T) * 8));
return (first_ext * second_ext) + third;
}
static std::string name() { return "Maccus"; }
};
template <typename T, typename R>
class Nmsub {
public:
static R apply(T first, T second, R third) {
return -((R)first * third) + (R)second;
}
static std::string name() { return "Nmsub"; }
};
template <typename T, typename R>
class Min {
public:
static R apply(T first, T second, R) {
return std::min(first, second);
}
static std::string name() { return "Min"; }
};
template <typename T, typename R>
class Max {
public:
static R apply(T first, T second, R) {
return std::max(first, second);
}
static std::string name() { return "Max"; }
};
template <typename T, typename R>
class And {
public:
static R apply(T first, T second, R) {
return first & second;
}
static std::string name() { return "And"; }
};
template <typename T, typename R>
class Or {
public:
static R apply(T first, T second, R) {
return first | second;
}
static std::string name() { return "Or"; }
};
template <typename T, typename R>
class Xor {
public:
static R apply(T first, T second, R) {
return first ^ second;
}
static std::string name() { return "Xor"; }
};
template <typename T, typename R>
class Sll {
public:
static R apply(T first, T second, R) {
// Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount.
return second << (first & (sizeof(T) * 8 - 1));
}
static std::string name() { return "Sll"; }
};
template <typename T, typename R>
bool bitAt(T value, R pos, R negOffset) {
R offsetPos = pos - negOffset;
return pos >= negOffset && ((value >> offsetPos) & 0x1);
}
template <typename T, typename R>
bool anyBitUpTo(T value, R to, R negOffset) {
R offsetTo = to - negOffset;
return to >= negOffset && (value & (((R)1 << (offsetTo + 1)) - 1));
}
template <typename T, typename R>
bool roundBit(T value, R shiftDown, uint32_t vxrm) {
switch (vxrm) {
case 0: // round-to-nearest-up
return bitAt(value, shiftDown, (R)1);
case 1: // round-to-nearest-even
return bitAt(value, shiftDown, (R)1) && (anyBitUpTo(value, shiftDown, (R)2) || bitAt(value, shiftDown, (R)0));
case 2: // round-down (truncate)
return 0;
case 3: // round-to-odd
return !bitAt(value, shiftDown, (R)0) && anyBitUpTo(value, shiftDown, (R)1);
default:
std::cout << "Roundoff - invalid value for vxrm: " << vxrm << std::endl;
std::abort();
}
}
template <typename T, typename R>
class SrlSra {
public:
static R apply(T first, T second, R) {
// Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount.
return second >> (first & (sizeof(T) * 8 - 1));
}
static R apply(T first, T second, uint32_t vxrm, uint32_t) {
// Saturation is not relevant for this operation
// Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount.
T firstValid = first & (sizeof(T) * 8 - 1);
return apply(firstValid, second, 0) + roundBit(second, firstValid, vxrm);
}
static std::string name() { return "SrlSra"; }
};
template <typename T, typename R>
class Aadd {
public:
static R apply(T first, T second, uint32_t vxrm, uint32_t) {
// Saturation is not relevant for this operation
T sum = second + first;
return (sum >> 1) + roundBit(sum, 1, vxrm);
}
static std::string name() { return "Aadd"; }
};
template <typename T, typename R>
class Asub {
public:
static R apply(T first, T second, uint32_t vxrm, uint32_t) {
// Saturation is not relevant for this operation
T difference = second - first;
return (difference >> 1) + roundBit(difference, 1, vxrm);
}
static std::string name() { return "Asub"; }
};
template <typename T, typename R>
class Eq {
public:
static R apply(T first, T second, R) {
return first == second;
}
static std::string name() { return "Eq"; }
};
template <typename T, typename R>
class Ne {
public:
static R apply(T first, T second, R) {
return first != second;
}
static std::string name() { return "Ne"; }
};
template <typename T, typename R>
class Lt {
public:
static R apply(T first, T second, R) {
return first > second;
}
static std::string name() { return "Lt"; }
};
template <typename T, typename R>
class Le {
public:
static R apply(T first, T second, R) {
return first >= second;
}
static std::string name() { return "Le"; }
};
template <typename T, typename R>
class Gt {
public:
static R apply(T first, T second, R) {
return first < second;
}
static std::string name() { return "Gt"; }
};
template <typename T, typename R>
class AndNot {
public:
static R apply(T first, T second, R) {
return second & ~first;
}
static std::string name() { return "AndNot"; }
};
template <typename T, typename R>
class OrNot {
public:
static R apply(T first, T second, R) {
return second | ~first;
}
static std::string name() { return "OrNot"; }
};
template <typename T, typename R>
class Nand {
public:
static R apply(T first, T second, R) {
return ~(second & first);
}
static std::string name() { return "Nand"; }
};
template <typename T, typename R>
class Mv {
public:
static R apply(T first, T, R) {
return first;
}
static std::string name() { return "Mv"; }
};
template <typename T, typename R>
class Nor {
public:
static R apply(T first, T second, R) {
return ~(second | first);
}
static std::string name() { return "Nor"; }
};
template <typename T, typename R>
class Xnor {
public:
static R apply(T first, T second, R) {
return ~(second ^ first);
}
static std::string name() { return "Xnor"; }
};
template <typename T, typename R>
class Fadd {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fadd_s(first, second, frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fadd_d(first_d, second_d, frm, &fflags);
} else {
std::cout << "Fadd only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fadd"; }
};
template <typename T, typename R>
class Fsub {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fsub_s(second, first, frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fsub_d(second_d, first_d, frm, &fflags);
} else {
std::cout << "Fsub only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fsub"; }
};
template <typename T, typename R>
class Fmacc {
public:
static R apply(T first, T second, R third) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fmadd_s(first, second, third, frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fmadd_d(first_d, second_d, third, frm, &fflags);
} else {
std::cout << "Fmacc only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmacc"; }
};
template <typename T, typename R>
class Fnmacc {
public:
static R apply(T first, T second, R third) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fnmadd_s(first, second, third, frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fnmadd_d(first_d, second_d, third, frm, &fflags);
} else {
std::cout << "Fnmacc only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fnmacc"; }
};
template <typename T, typename R>
class Fmsac {
public:
static R apply(T first, T second, R third) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags);
} else {
std::cout << "Fmsac only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmsac"; }
};
template <typename T, typename R>
class Fnmsac {
public:
static R apply(T first, T second, R third) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fnmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fnmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags);
} else {
std::cout << "Fnmsac only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fnmsac"; }
};
template <typename T, typename R>
class Fmadd {
public:
static R apply(T first, T second, R third) {
if (sizeof(T) == 4 || sizeof(T) == 8) {
return Fmacc<T, R>::apply(first, third, second);
} else {
std::cout << "Fmadd only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmadd"; }
};
template <typename T, typename R>
class Fnmadd {
public:
static R apply(T first, T second, R third) {
if (sizeof(T) == 4 || sizeof(T) == 8) {
return Fnmacc<T, R>::apply(first, third, second);
} else {
std::cout << "Fnmadd only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fnmadd"; }
};
template <typename T, typename R>
class Fmsub {
public:
static R apply(T first, T second, R third) {
if (sizeof(T) == 4 || sizeof(T) == 8) {
return Fmsac<T, R>::apply(first, third, second);
} else {
std::cout << "Fmsub only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmsub"; }
};
template <typename T, typename R>
class Fnmsub {
public:
static R apply(T first, T second, R third) {
if (sizeof(T) == 4 || sizeof(T) == 8) {
return Fnmsac<T, R>::apply(first, third, second);
} else {
std::cout << "Fnmsub only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fnmsub"; }
};
template <typename T, typename R>
class Fmin {
public:
static R apply(T first, T second, R) {
// ignoring rounding modes for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_fmin_s(first, second, &fflags);
} else if (sizeof(T) == 8) {
return rv_fmin_d(first, second, &fflags);
} else {
std::cout << "Fmin only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmin"; }
};
template <typename T, typename R>
class Fmax {
public:
static R apply(T first, T second, R) {
// ignoring rounding modes for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_fmax_s(first, second, &fflags);
} else if (sizeof(T) == 8) {
return rv_fmax_d(first, second, &fflags);
} else {
std::cout << "Fmax only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmax"; }
};
template <typename T, typename R>
class Fsgnj {
public:
static R apply(T first, T second, R) {
if (sizeof(T) == 4) {
return rv_fsgnj_s(second, first);
} else if (sizeof(T) == 8) {
return rv_fsgnj_d(second, first);
} else {
std::cout << "Fsgnj only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fsgnj"; }
};
template <typename T, typename R>
class Fsgnjn {
public:
static R apply(T first, T second, R) {
if (sizeof(T) == 4) {
return rv_fsgnjn_s(second, first);
} else if (sizeof(T) == 8) {
return rv_fsgnjn_d(second, first);
} else {
std::cout << "Fsgnjn only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fsgnjn"; }
};
template <typename T, typename R>
class Fsgnjx {
public:
static R apply(T first, T second, R) {
if (sizeof(T) == 4) {
return rv_fsgnjx_s(second, first);
} else if (sizeof(T) == 8) {
return rv_fsgnjx_d(second, first);
} else {
std::cout << "Fsgnjx only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fsgnjx"; }
};
template <typename T, typename R>
class Fcvt {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(T) == 4) {
switch (first) {
case 0b00000: // vfcvt.xu.f.v
return rv_ftou_s(second, frm, &fflags);
case 0b00001: // vfcvt.x.f.v
return rv_ftoi_s(second, frm, &fflags);
case 0b00010: // vfcvt.f.xu.v
return rv_utof_s(second, frm, &fflags);
case 0b00011: // vfcvt.f.x.v
return rv_itof_s(second, frm, &fflags);
case 0b00110: // vfcvt.rtz.xu.f.v
return rv_ftou_s(second, 1, &fflags);
case 0b00111: // vfcvt.rtz.x.f.v
return rv_ftoi_s(second, 1, &fflags);
case 0b01000: // vfwcvt.xu.f.v
return rv_ftolu_s(second, frm, &fflags);
case 0b01001: // vfwcvt.x.f.v
return rv_ftol_s(second, frm, &fflags);
case 0b01010: // vfwcvt.f.xu.v
return rv_utof_d(second, frm, &fflags);
case 0b01011: // vfwcvt.f.x.v
return rv_itof_d(second, frm, &fflags);
case 0b01100: // vfwcvt.f.f.v
return rv_ftod(second);
case 0b01110: // vfwcvt.rtz.xu.f.v
return rv_ftolu_s(second, 1, &fflags);
case 0b01111: // vfwcvt.rtz.x.f.v
return rv_ftol_s(second, 1, &fflags);
default:
std::cout << "Fcvt has unsupported value for first: " << first << std::endl;
std::abort();
}
} else if (sizeof(T) == 8) {
switch (first) {
case 0b00000: // vfcvt.xu.f.v
return rv_ftolu_d(second, frm, &fflags);
case 0b00001: // vfcvt.x.f.v
return rv_ftol_d(second, frm, &fflags);
case 0b00010: // vfcvt.f.xu.v
return rv_lutof_d(second, frm, &fflags);
case 0b00011: // vfcvt.f.x.v
return rv_ltof_d(second, frm, &fflags);
case 0b00110: // vfcvt.rtz.xu.f.v
return rv_ftolu_d(second, 1, &fflags);
case 0b00111: // vfcvt.rtz.x.f.v
return rv_ftol_d(second, 1, &fflags);
case 0b01000: // vfwcvt.xu.f.v
case 0b01001: // vfwcvt.x.f.v
case 0b01010: // vfwcvt.f.xu.v
case 0b01011: // vfwcvt.f.x.v
case 0b01100: // vfwcvt.f.f.v
case 0b01110: // vfwcvt.rtz.xu.f.v
case 0b01111: // vfwcvt.rtz.x.f.v
std::cout << "Fwcvt only supports f32" << std::endl;
std::abort();
default:
std::cout << "Fcvt has unsupported value for first: " << first << std::endl;
std::abort();
}
} else {
std::cout << "Fcvt only supports f32 and f64" << std::endl;
std::abort();
}
}
static R apply(T first, T second, uint32_t vxrm, uint32_t &) { // saturation argument is unused
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 8) {
switch (first) {
case 0b10000: // vfncvt.xu.f.w
return rv_ftou_d(second, vxrm, &fflags);
case 0b10001: // vfncvt.x.f.w
return rv_ftoi_d(second, vxrm, &fflags);
case 0b10010: // vfncvt.f.xu.w
return rv_lutof_s(second, vxrm, &fflags);
case 0b10011: // vfncvt.f.x.w
return rv_ltof_s(second, vxrm, &fflags);
case 0b10100: // vfncvt.f.f.w
return rv_dtof_r(second, vxrm);
case 0b10101: // vfncvt.rod.f.f.w
return rv_dtof_r(second, 6);
case 0b10110: // vfncvt.rtz.xu.f.w
return rv_ftou_d(second, 1, &fflags);
case 0b10111: // vfncvt.rtz.x.f.w
return rv_ftoi_d(second, 1, &fflags);
default:
std::cout << "Fncvt has unsupported value for first: " << first << std::endl;
std::abort();
}
} else {
std::cout << "Fncvt only supports f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fcvt"; }
};
template <typename T, typename R>
class Funary1 {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(T) == 4) {
switch (first) {
case 0b00000: // vfsqrt.v
return rv_fsqrt_s(second, frm, &fflags);
case 0b00100: // vfrsqrt7.v
return rv_frsqrt7_s(second, frm, &fflags);
case 0b00101: // vfrec7.v
return rv_frecip7_s(second, frm, &fflags);
case 0b10000: // vfclass.v
return rv_fclss_s(second);
default:
std::cout << "Funary1 has unsupported value for first: " << first << std::endl;
std::abort();
}
} else if (sizeof(T) == 8) {
switch (first) {
case 0b00000: // vfsqrt.v
return rv_fsqrt_d(second, frm, &fflags);
case 0b00100: // vfrsqrt7.v
return rv_frsqrt7_d(second, frm, &fflags);
case 0b00101: // vfrec7.v
return rv_frecip7_d(second, frm, &fflags);
case 0b10000: // vfclass.v
return rv_fclss_d(second);
default:
std::cout << "Funary1 has unsupported value for first: " << first << std::endl;
std::abort();
}
} else {
std::cout << "Funary1 only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Funary1"; }
};
template <typename T, typename R>
class Xunary0 {
public:
static R apply(T, T second, T) {
return second;
}
static std::string name() { return "Xunary0"; }
};
template <typename T, typename R>
class Feq {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_feq_s(second, first, &fflags);
} else if (sizeof(T) == 8) {
return rv_feq_d(second, first, &fflags);
} else {
std::cout << "Feq only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Feq"; }
};
template <typename T, typename R>
class Fle {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_fle_s(second, first, &fflags);
} else if (sizeof(T) == 8) {
return rv_fle_d(second, first, &fflags);
} else {
std::cout << "Fle only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fle"; }
};
template <typename T, typename R>
class Flt {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_flt_s(second, first, &fflags);
} else if (sizeof(T) == 8) {
return rv_flt_d(second, first, &fflags);
} else {
std::cout << "Flt only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Flt"; }
};
template <typename T, typename R>
class Fne {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return !rv_feq_s(second, first, &fflags);
} else if (sizeof(T) == 8) {
return !rv_feq_d(second, first, &fflags);
} else {
std::cout << "Fne only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fne"; }
};
template <typename T, typename R>
class Fgt {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_flt_s(first, second, &fflags);
} else if (sizeof(T) == 8) {
return rv_flt_d(first, second, &fflags);
} else {
std::cout << "Fgt only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fgt"; }
};
template <typename T, typename R>
class Fge {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
if (sizeof(T) == 4) {
return rv_fle_s(first, second, &fflags);
} else if (sizeof(T) == 8) {
return rv_fle_d(first, second, &fflags);
} else {
std::cout << "Fge only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fge"; }
};
template <typename T, typename R>
class Fdiv {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(T) == 4) {
return rv_fdiv_s(second, first, frm, &fflags);
} else if (sizeof(T) == 8) {
return rv_fdiv_d(second, first, frm, &fflags);
} else {
std::cout << "Fdiv only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fdiv"; }
};
template <typename T, typename R>
class Frdiv {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(T) == 4) {
return rv_fdiv_s(first, second, frm, &fflags);
} else if (sizeof(T) == 8) {
return rv_fdiv_d(first, second, frm, &fflags);
} else {
std::cout << "Frdiv only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Frdiv"; }
};
template <typename T, typename R>
class Fmul {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(R) == 4) {
return rv_fmul_s(first, second, frm, &fflags);
} else if (sizeof(R) == 8) {
uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first);
uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second);
return rv_fmul_d(first_d, second_d, frm, &fflags);
} else {
std::cout << "Fmul only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Fmul"; }
};
template <typename T, typename R>
class Frsub {
public:
static R apply(T first, T second, R) {
// ignoring flags for now
uint32_t fflags = 0;
// ignoring rounding mode for now
uint32_t frm = 0;
if (sizeof(T) == 4) {
return rv_fsub_s(first, second, frm, &fflags);
} else if (sizeof(T) == 8) {
return rv_fsub_d(first, second, frm, &fflags);
} else {
std::cout << "Frsub only supports f32 and f64" << std::endl;
std::abort();
}
}
static std::string name() { return "Frsub"; }
};
template <typename T, typename R>
class Clip {
public:
static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) {
// The low lg2(2*SEW) bits of the vector or scalar shift-amount value (e.g., the low 6 bits for a SEW=64-bit to
// SEW=32-bit narrowing operation) are used to control the right shift amount, which provides the scaling.
R firstValid = first & (sizeof(T) * 8 - 1);
T unclippedResult = (second >> firstValid) + roundBit(second, firstValid, vxrm);
R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits<R>::min(), (T)std::numeric_limits<R>::max());
vxsat_ |= clippedResult != unclippedResult;
return clippedResult;
}
static std::string name() { return "Clip"; }
};
template <typename T, typename R>
class Smul {
public:
static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) {
R shift = sizeof(R) * 8 - 1;
T unshiftedResult = first * second;
T unclippedResult = (unshiftedResult >> shift) + roundBit(unshiftedResult, shift, vxrm);
R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits<R>::min(), (T)std::numeric_limits<R>::max());
vxsat_ |= clippedResult != unclippedResult;
return clippedResult;
}
static std::string name() { return "Smul"; }
};
///////////////////////////////////////////////////////////////////////////////
bool isMasked(std::vector<std::vector<Byte>> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) {
auto &mask = vreg_file.at(maskVreg);
uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8);
uint8_t value = (emask >> (byteI % 8)) & 0x1;
DP(4, "Masking enabled: " << +!vmask << " mask element: " << +value);
return !vmask && value == 0;
}
template <typename DT>
uint32_t getVreg(uint32_t baseVreg, uint32_t byteI) {
uint32_t vsew = sizeof(DT) * 8;
return (baseVreg + (byteI / (VLEN / vsew))) % 32;
}
template <typename DT>
DT &getVregData(std::vector<vortex::Byte> &baseVregVec, uint32_t byteI) {
uint32_t vsew = sizeof(DT) * 8;
return *(DT *)(baseVregVec.data() + (byteI % (VLEN / vsew)) * vsew / 8);
}
template <typename DT>
DT &getVregData(std::vector<std::vector<vortex::Byte>> &vreg_file, uint32_t baseVreg, uint32_t byteI) {
auto &vr1 = vreg_file.at(getVreg<DT>(baseVreg, byteI));
return getVregData<DT>(vr1, byteI);
}
template <typename DT>
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
if (nfields * emul > 8) {
std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl;
std::abort();
}
for (uint32_t i = 0; i < vl * nfields; i++) {
if (isMasked(vreg_file, 0, i / nfields, vmask))
continue;
uint32_t nfields_strided = strided ? nfields : 1;
Word mem_addr = (base_addr & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT);
Word mem_data = 0;
emul_->dcache_read(&mem_data, mem_addr, vsew / 8);
DP(4, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DT &result = getVregData<DT>(vreg_file, rdest + (i % nfields) * emul, i / nfields);
DP(4, "Previous data: " << +result);
result = (DT)mem_data;
}
}
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_load<uint8_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
break;
case 16:
vector_op_vix_load<uint16_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
break;
case 32:
vector_op_vix_load<uint32_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
break;
case 64:
vector_op_vix_load<uint64_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
break;
default:
std::cout << "Failed to execute VLE for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
if (nfields * emul > 8) {
std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl;
std::abort();
}
for (uint32_t i = 0; i < vl * nfields; i++) {
if (isMasked(vreg_file, 0, i / nfields, vmask))
continue;
Word offset = 0;
switch (iSew) {
case 8:
offset = getVregData<uint8_t>(vreg_file, rsrc1, i / nfields);
break;
case 16:
offset = getVregData<uint16_t>(vreg_file, rsrc1, i / nfields);
break;
case 32:
offset = getVregData<uint32_t>(vreg_file, rsrc1, i / nfields);
break;
case 64:
offset = getVregData<uint64_t>(vreg_file, rsrc1, i / nfields);
break;
default:
std::cout << "Unsupported iSew: " << iSew << std::endl;
std::abort();
}
Word mem_addr = (base_addr & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT);
Word mem_data = 0;
emul_->dcache_read(&mem_data, mem_addr, vsew / 8);
DP(4, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg<DT>(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
DT &result = getVregData<DT>(vreg_file, rdest + (i % nfields) * emul, i / nfields);
DP(4, "Previous data: " << +result);
result = (DT)mem_data;
}
}
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_load<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
break;
case 16:
vector_op_vv_load<uint16_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
break;
case 32:
vector_op_vv_load<uint32_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
break;
case 64:
vector_op_vv_load<uint64_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
break;
default:
std::cout << "Failed to execute VLUX/VLOX for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
for (uint32_t i = 0; i < vl * nfields; i++) {
if (isMasked(vreg_file, 0, i / nfields, vmask))
continue;
uint32_t nfields_strided = strided ? nfields : 1;
Word mem_addr = base_addr + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT);
Word mem_data = getVregData<DT>(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields);
DP(4, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
emul_->dcache_write(&mem_data, mem_addr, vsew / 8);
}
}
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_store<uint8_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
break;
case 16:
vector_op_vix_store<uint16_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
break;
case 32:
vector_op_vix_store<uint32_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
break;
case 64:
vector_op_vix_store<uint64_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
break;
default:
std::cout << "Failed to execute VSE for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
for (uint32_t i = 0; i < vl * nfields; i++) {
if (isMasked(vreg_file, 0, i / nfields, vmask))
continue;
Word offset = 0;
switch (iSew) {
case 8:
offset = getVregData<uint8_t>(vreg_file, rsrc1, i / nfields);
break;
case 16:
offset = getVregData<uint16_t>(vreg_file, rsrc1, i / nfields);
break;
case 32:
offset = getVregData<uint32_t>(vreg_file, rsrc1, i / nfields);
break;
case 64:
offset = getVregData<uint64_t>(vreg_file, rsrc1, i / nfields);
break;
default:
std::cout << "Unsupported iSew: " << iSew << std::endl;
std::abort();
}
Word mem_addr = base_addr + offset + (i % nfields) * sizeof(DT);
Word mem_data = getVregData<DT>(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields);
DP(4, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg<DT>(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields);
emul_->dcache_write(&mem_data, mem_addr, vsew / 8);
}
}
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_store<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
break;
case 16:
vector_op_vv_store<uint16_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
break;
case 32:
vector_op_vv_store<uint32_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
break;
case 64:
vector_op_vv_store<uint64_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
break;
default:
std::cout << "Failed to execute VSUX/VSOX for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vix(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT second = getVregData<DT>(vreg_file, rsrc0, i);
DT third = getVregData<DT>(vreg_file, rdest, i);
DT result = OP<DT, DT>::apply(first, second, third);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 16:
vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 32:
vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 64:
vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vix_carry(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl) {
for (uint32_t i = 0; i < vl; i++) {
DT second = getVregData<DT>(vreg_file, rsrc0, i);
bool third = !isMasked(vreg_file, 0, i, false);
DT result = OP<DT, DT>::apply(first, second, third);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_carry(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl) {
switch (vsew) {
case 8:
vector_op_vix_carry<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl);
break;
case 16:
vector_op_vix_carry<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl);
break;
case 32:
vector_op_vix_carry<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl);
break;
case 64:
vector_op_vix_carry<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl);
break;
default:
std::cout << "Failed to execute VI/VX carry for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vix_carry_out(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
DT second = getVregData<DT>(vreg_file, rsrc0, i);
bool third = !vmask && !isMasked(vreg_file, 0, i, vmask);
bool result = OP<DT, DTR>::apply(first, second, third);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
getVregData<uint8_t>(vreg_file, rdest, i / 8) &= ~(1 << (i % 8));
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vix_carry_out(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_carry_out<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 16:
vector_op_vix_carry_out<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 32:
vector_op_vix_carry_out<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 64:
vector_op_vix_carry_out<OP, DT64, DT128>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VI/VX carry out for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vix_merge(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
DT result = isMasked(vreg_file, 0, i, vmask) ? getVregData<DT>(vreg_file, rsrc0, i) : first;
DP(4, "Merge - Choosing result: " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_merge(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_merge<DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 16:
vector_op_vix_merge<DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 32:
vector_op_vix_merge<DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 64:
vector_op_vix_merge<DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_scalar(DT &dest, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t vsew) {
if (rsrc0 != 0) {
std::cout << "Vwxunary0/Vwfunary0 has unsupported value for vs2: " << rsrc0 << std::endl;
std::abort();
}
switch (vsew) {
case 8:
dest = getVregData<uint8_t>(vreg_file, rsrc1, 0);
break;
case 16:
dest = getVregData<uint16_t>(vreg_file, rsrc1, 0);
break;
case 32:
dest = getVregData<uint32_t>(vreg_file, rsrc1, 0);
break;
case 64:
dest = getVregData<uint64_t>(vreg_file, rsrc1, 0);
break;
default:
std::cout << "Failed to execute vmv.x.s/vfmv.f.s for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vix_w(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT second = getVregData<DT>(vreg_file, rsrc0, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DT, DTR>::apply(first, second, third);
DP(4, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_w<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 16:
vector_op_vix_w<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 32:
vector_op_vix_w<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VI/VX widening for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_wx(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 16:
vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 32:
vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VI/VX widening wx for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vix_n(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT second = getVregData<DT>(vreg_file, rsrc0, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(4, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_n(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
switch (vsew) {
case 8:
vector_op_vix_n<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 16:
vector_op_vix_n<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 32:
vector_op_vix_n<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
default:
std::cout << "Failed to execute VI/VX narrowing for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vix_sat(DTR first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT second = getVregData<DTR>(vreg_file, rsrc0, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(4, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
switch (vsew) {
case 8:
vector_op_vix_sat<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 16:
vector_op_vix_sat<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 32:
vector_op_vix_sat<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 64:
vector_op_vix_sat<OP, DT128, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
default:
std::cout << "Failed to execute VI/VX saturating for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_scale(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
switch (vsew) {
case 8:
vector_op_vix_sat<OP, DT8, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 16:
vector_op_vix_sat<OP, DT16, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 32:
vector_op_vix_sat<OP, DT32, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
case 64:
vector_op_vix_sat<OP, DT64, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
break;
default:
std::cout << "Failed to execute VI/VX scale for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP>
void vector_op_vix_ext(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 16) {
switch (src1) {
case 0b00110: // vzext.vf2
vector_op_vix_w<OP, uint8_t, uint16_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00111: // vsext.vf2
vector_op_vix_w<OP, int8_t, int16_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Xunary0 has unsupported value for vf: " << src1 << std::endl;
std::abort();
}
} else if (vsew == 32) {
switch (src1) {
case 0b00100: // vzext.vf4
vector_op_vix_w<OP, uint8_t, uint32_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00101: // vsext.vf4
vector_op_vix_w<OP, int8_t, int32_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00110: // vzext.vf2
vector_op_vix_w<OP, uint16_t, uint32_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00111: // vsext.vf2
vector_op_vix_w<OP, int16_t, int32_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Xunary0 has unsupported value for vf: " << src1 << std::endl;
std::abort();
}
} else if (vsew == 64) {
switch (src1) {
case 0b00010: // vzext.vf8
vector_op_vix_w<OP, uint8_t, uint64_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00011: // vsext.vf8
vector_op_vix_w<OP, int8_t, int64_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00100: // vzext.vf4
vector_op_vix_w<OP, uint16_t, uint64_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00101: // vsext.vf4
vector_op_vix_w<OP, int16_t, int64_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00110: // vzext.vf2
vector_op_vix_w<OP, uint32_t, uint64_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 0b00111: // vsext.vf2
vector_op_vix_w<OP, int32_t, int64_t>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Xunary0 has unsupported value for vf: " << src1 << std::endl;
std::abort();
}
} else {
std::cout << "Failed to execute Xunary0 for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vix_mask(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT second = getVregData<DT>(vreg_file, rsrc0, i);
bool result = OP<DT, bool>::apply(first, second, 0);
DP(4, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
getVregData<uint8_t>(vreg_file, rdest, i / 8) &= ~(1 << (i % 8));
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_mask(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_mask<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 16:
vector_op_vix_mask<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 32:
vector_op_vix_mask<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
case 64:
vector_op_vix_mask<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VI/VX integer/float compare mask for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, Word vlmax, uint32_t vmask, bool scalar) {
// If vlmax > 0 this means we have a vslidedown instruction, vslideup does not require vlmax
bool slideDown = vlmax;
uint32_t scalarPos = slideDown ? vl - 1 : 0;
// If scalar set is set this means we have a v(f)slide1up or v(f)slide1down instruction,
// so first is our scalar value and we need to overwrite it with 1 for later computations
if (scalar && vl && !isMasked(vreg_file, 0, scalarPos, vmask)) {
DP(4, "Slide - Moving scalar value " << +first << " to position " << +scalarPos);
getVregData<DT>(vreg_file, rdest, scalarPos) = first;
}
first = scalar ? 1 : first;
for (Word i = slideDown ? 0 : first; i < vl - (scalar && vl && slideDown); i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
__uint128_t iSrc = slideDown ? (__uint128_t)i + (__uint128_t)first : (__uint128_t)i - (__uint128_t)first; // prevent overflows/underflows
DT value = (!slideDown || iSrc < vlmax) ? getVregData<DT>(vreg_file, rsrc0, iSrc) : 0;
DP(4, "Slide - Moving value " << +value << " from position " << (uint64_t)iSrc << " to position " << +i);
getVregData<DT>(vreg_file, rdest, i) = value;
}
}
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_slide(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask, bool scalar) {
switch (vsew) {
case 8:
vector_op_vix_slide<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
break;
case 16:
vector_op_vix_slide<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
break;
case 32:
vector_op_vix_slide<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
break;
case 64:
vector_op_vix_slide<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
break;
default:
std::cout << "Failed to execute VI/VX slide for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vix_gather(Word first, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vl, Word vlmax, uint32_t vmask) {
for (Word i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT value = first < vlmax ? getVregData<DT>(vreg_file, rsrc0, first) : 0;
DP(4, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
getVregData<DT>(vreg_file, rdest, i) = value;
}
}
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_gather(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_gather<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
break;
case 16:
vector_op_vix_gather<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
break;
case 32:
vector_op_vix_gather<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
break;
case 64:
vector_op_vix_gather<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
break;
default:
std::cout << "Failed to execute VI/VX register gather for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DT third = getVregData<DT>(vreg_file, rdest, i);
DT result = OP<DT, DT>::apply(first, second, third);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 64:
vector_op_vv<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl) {
for (uint32_t i = 0; i < vl; i++) {
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
bool third = !isMasked(vreg_file, 0, i, false);
DT result = OP<DT, DT>::apply(first, second, third);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
switch (vsew) {
case 8:
vector_op_vv_carry<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
case 16:
vector_op_vv_carry<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
case 32:
vector_op_vv_carry<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
case 64:
vector_op_vv_carry<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
default:
std::cout << "Failed to execute VV carry for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
bool third = !vmask && !isMasked(vreg_file, 0, i, vmask);
bool result = OP<DT, DTR>::apply(first, second, third);
DP(4, (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
getVregData<uint8_t>(vreg_file, rdest, i / 8) &= ~(1 << (i % 8));
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_carry_out<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_carry_out<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_carry_out<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 64:
vector_op_vv_carry_out<OP, DT64, DT128>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV carry out for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
uint32_t rsrc = isMasked(vreg_file, 0, i, vmask) ? rsrc1 : rsrc0;
DT result = getVregData<DT>(vreg_file, rsrc, i);
DP(4, "Merge - Choosing result: " << +result);
getVregData<DT>(vreg_file, rdest, i) = result;
}
}
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_merge<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_merge<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_merge<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 64:
vector_op_vv_merge<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, bool ei16, uint32_t vlmax, uint32_t vmask) {
for (Word i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
uint32_t first = ei16 ? getVregData<uint16_t>(vreg_file, rsrc0, i) : getVregData<DT>(vreg_file, rsrc0, i);
DT value = first < vlmax ? getVregData<DT>(vreg_file, rsrc1, first) : 0;
DP(4, "Register gather - Moving value " << +value << " from position " << +first << " to position " << +i);
getVregData<DT>(vreg_file, rdest, i) = value;
}
}
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, bool ei16, uint32_t vlmax, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_gather<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
break;
case 16:
vector_op_vv_gather<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
break;
case 32:
vector_op_vv_gather<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
break;
case 64:
vector_op_vv_gather<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
break;
default:
std::cout << "Failed to execute VV register gather for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DT, DTR>::apply(first, second, third);
DP(4, "Widening " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV widening for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DTR second = getVregData<DTR>(vreg_file, rsrc1, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DTR, DTR>::apply(first, second, third);
DP(4, "Widening wv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_wv<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_wv<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_wv<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV widening wv for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_wfv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DTR second = getVregData<DTR>(vreg_file, rsrc1, i);
DTR third = getVregData<DTR>(vreg_file, rdest, i);
DTR result = OP<DTR, DTR>::apply(rv_ftod(first), second, third);
DP(4, "Widening wfv " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ", " << +third << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_wfv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 32) {
vector_op_vv_wfv<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
std::cout << "Failed to execute VV widening wfv for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DTR first = getVregData<DTR>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(4, "Narrowing " << (OP<DT, DTR>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
switch (vsew) {
case 8:
vector_op_vv_n<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 16:
vector_op_vv_n<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 32:
vector_op_vv_n<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
default:
std::cout << "Failed to execute VV narrowing for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DTR>(vreg_file, rsrc0, i);
DT second = getVregData<DTR>(vreg_file, rsrc1, i);
DTR result = OP<DT, DTR>::apply(first, second, vxrm, vxsat);
DP(4, "Saturating " << (OP<DT, DTR>::name()) << "(" << +(DTR)first << ", " << +(DTR)second << ")" << " = " << +(DTR)result);
getVregData<DTR>(vreg_file, rdest, i) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
switch (vsew) {
case 8:
vector_op_vv_sat<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 16:
vector_op_vv_sat<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 32:
vector_op_vv_sat<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 64:
vector_op_vv_sat<OP, DT128, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
default:
std::cout << "Failed to execute VV saturating for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_scale(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
switch (vsew) {
case 8:
vector_op_vv_sat<OP, DT8, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 16:
vector_op_vv_sat<OP, DT16, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 32:
vector_op_vv_sat<OP, DT32, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
case 64:
vector_op_vv_sat<OP, DT64, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
break;
default:
std::cout << "Failed to execute VV scale for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
// use rdest as accumulator
if (i == 0) {
getVregData<DT>(vreg_file, rdest, 0) = getVregData<DT>(vreg_file, rsrc0, 0);
}
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DT>(vreg_file, rdest, 0);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DT result = OP<DT, DT>::apply(first, second, 0);
DP(4, "Reduction " << (OP<DT, DT>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
getVregData<DT>(vreg_file, rdest, 0) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_red<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_red<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_red<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 64:
vector_op_vv_red<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV reduction for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
// use rdest as accumulator
if (i == 0) {
getVregData<DTR>(vreg_file, rdest, 0) = getVregData<DTR>(vreg_file, rsrc0, 0);
}
if (isMasked(vreg_file, 0, i, vmask))
continue;
DTR first = getVregData<DTR>(vreg_file, rdest, 0);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR second_w = std::is_signed<DT>() ? sext((DTR)second, sizeof(DT) * 8) : zext((DTR)second, sizeof(DT) * 8);
DTR result = OP<DTR, DTR>::apply(first, second_w, 0);
DP(4, "Widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, 0) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_red_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_red_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_red_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV widening reduction for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT, typename DTR>
void vector_op_vv_red_wf(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
// use rdest as accumulator
if (i == 0) {
getVregData<DTR>(vreg_file, rdest, 0) = getVregData<DTR>(vreg_file, rsrc0, 0);
}
if (isMasked(vreg_file, 0, i, vmask))
continue;
DTR first = getVregData<DTR>(vreg_file, rdest, 0);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
DTR second_w = rv_ftod(second);
DTR result = OP<DTR, DTR>::apply(first, second_w, 0);
DP(4, "Float widening reduction " << (OP<DTR, DTR>::name()) << "(" << +first << ", " << +second_w << ")" << " = " << +result);
getVregData<DTR>(vreg_file, rdest, 0) = result;
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_red_wf(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 32) {
vector_op_vv_red_wf<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
std::cout << "Failed to execute VV float widening reduction for vsew: " << vsew << std::endl;
std::abort();
}
}
template <typename DT>
void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DP(4, "Element Index = " << +i);
getVregData<DT>(vreg_file, rdest, i) = i;
}
}
void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vid<uint8_t>(vreg_file, rdest, vl, vmask);
break;
case 16:
vector_op_vid<uint16_t>(vreg_file, rdest, vl, vmask);
break;
case 32:
vector_op_vid<uint32_t>(vreg_file, rdest, vl, vmask);
break;
case 64:
vector_op_vid<uint64_t>(vreg_file, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute vector element index for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP, typename DT>
void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl, uint32_t vmask) {
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
DT first = getVregData<DT>(vreg_file, rsrc0, i);
DT second = getVregData<DT>(vreg_file, rsrc1, i);
bool result = OP<DT, bool>::apply(first, second, 0);
DP(4, "Integer/float compare mask " << (OP<DT, bool>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
getVregData<uint8_t>(vreg_file, rdest, i / 8) &= ~(1 << (i % 8));
}
}
}
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_mask<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 16:
vector_op_vv_mask<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 32:
vector_op_vv_mask<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
case 64:
vector_op_vv_mask<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
break;
default:
std::cout << "Failed to execute VV integer/float compare mask for vsew: " << vsew << std::endl;
std::abort();
}
}
template <template <typename DT1, typename DT2> class OP>
void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl) {
for (uint32_t i = 0; i < vl; i++) {
uint8_t firstMask = getVregData<uint8_t>(vreg_file, rsrc0, i / 8);
bool first = (firstMask >> (i % 8)) & 0x1;
uint8_t secondMask = getVregData<uint8_t>(vreg_file, rsrc1, i / 8);
bool second = (secondMask >> (i % 8)) & 0x1;
bool result = OP<uint8_t, uint8_t>::apply(first, second, 0) & 0x1;
DP(4, "Compare mask bits " << (OP<uint8_t, uint8_t>::name()) << "(" << +first << ", " << +second << ")" << " = " << +result);
if (result) {
getVregData<uint8_t>(vreg_file, rdest, i / 8) |= 1 << (i % 8);
} else {
getVregData<uint8_t>(vreg_file, rdest, i / 8) &= ~(1 << (i % 8));
}
}
}
template <typename DT>
void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vl) {
int currPos = 0;
for (uint32_t i = 0; i < vl; i++) {
// Special case: use rsrc0 as mask vector register instead of default v0
// This instruction is always masked (vmask == 0), but encoded as unmasked (vmask == 1)
if (isMasked(vreg_file, rsrc0, i, 0))
continue;
DT value = getVregData<DT>(vreg_file, rsrc1, i);
DP(4, "Compression - Moving value " << +value << " from position " << i << " to position " << currPos);
getVregData<DT>(vreg_file, rdest, currPos) = value;
currPos++;
}
}
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
switch (vsew) {
case 8:
vector_op_vv_compress<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
case 16:
vector_op_vv_compress<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
case 32:
vector_op_vv_compress<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
case 64:
vector_op_vv_compress<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
break;
default:
std::cout << "Failed to execute VV compression for vsew: " << vsew << std::endl;
std::abort();
}
}
#endif