minor fixes

This commit is contained in:
tinebp 2024-12-05 22:38:04 -08:00
parent 896c59306c
commit 115ff2b599
5 changed files with 308 additions and 177 deletions

View file

@ -363,8 +363,8 @@ scope()
{
echo "begin scope tests..."
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
echo "debugging scope done!"
}
@ -385,7 +385,7 @@ synthesis()
echo "begin synthesis tests..."
PREFIX=build_base make -C hw/syn/yosys clean
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis
echo "synthesis tests done!"
}

View file

@ -1,3 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 make -C ./ci/../driver/rtlsim
verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/softfloat_ext.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so

24
perf/cache/run.sh vendored
View file

@ -10,17 +10,17 @@ sgemm()
{
echo "begin cache tests"
CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > ./perf/cache/cache_perf.log
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > cache_perf.log
echo -e "\n**************************************\n" >> cache_perf.log
CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
echo -e "\n**************************************\n" >> cache_perf.log
CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
echo -e "\n**************************************\n" >> cache_perf.log
CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
echo -e "\n**************************************\n" >> cache_perf.log
CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
echo -e "\n**************************************\n" >> cache_perf.log
CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
echo "cache tests done!"
}
@ -36,6 +36,6 @@ case $1 in
-h | --help ) usage
;;
* ) sgemm
;;
;;
esac
shift

View file

@ -148,10 +148,9 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
59, 58, 57, 56, 56, 55, 54, 53};
if (sub) {
while (extract64(sig, s - 1, 1) == 0) {
exp--;
sig <<= 1;
}
while (extract64(sig, s - 1, 1) == 0)
exp--, sig <<= 1;
sig = (sig << 1) & make_mask64(0, s);
}
@ -358,9 +357,9 @@ float16_t f16_recip7(float16_t in) {
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 5, 10, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
if (round_abnormal) {
softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
}
break;
}
@ -401,9 +400,9 @@ float32_t f32_recip7(float32_t in) {
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 8, 23, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
if (round_abnormal) {
softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
}
break;
}
@ -444,9 +443,9 @@ float64_t f64_recip7(float64_t in) {
[[fallthrough]];
default: // +- normal
uA.ui = recip7(uA.ui, 11, 52, softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |=
softfloat_flag_inexact | softfloat_flag_overflow;
if (round_abnormal) {
softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
}
break;
}

View file

@ -44,7 +44,7 @@ template <typename T, typename R>
class Madc {
public:
static R apply(T first, T second, R third) {
return (R)first + (R)second + third > (R)std::numeric_limits<T>::max();
return ((R)first + (R)second + third) > (R)std::numeric_limits<T>::max();
}
static std::string name() { return "Madc"; }
};
@ -62,7 +62,7 @@ template <typename T, typename R>
class Msbc {
public:
static R apply(T first, T second, R third) {
return (R)second < (R)first + third;
return (R)second < ((R)first + third);
}
static std::string name() { return "Msbc"; }
};
@ -1128,6 +1128,8 @@ public:
static std::string name() { return "Smul"; }
};
///////////////////////////////////////////////////////////////////////////////
bool isMasked(std::vector<std::vector<Byte>> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) {
auto &mask = vreg_file.at(maskVreg);
uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8);
@ -1155,7 +1157,7 @@ DT &getVregData(std::vector<std::vector<vortex::Byte>> &vreg_file, uint32_t base
}
template <typename DT>
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
if (nfields * emul > 8) {
@ -1177,7 +1179,7 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
}
}
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_load<uint8_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
@ -1198,7 +1200,7 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
}
template <typename DT>
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
if (nfields * emul > 8) {
@ -1238,7 +1240,7 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
}
}
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_load<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
@ -1259,7 +1261,7 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
}
template <typename DT>
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
for (uint32_t i = 0; i < vl * nfields; i++) {
@ -1274,7 +1276,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
}
}
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vix_store<uint8_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
@ -1295,7 +1297,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
}
template <typename DT>
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
uint32_t vsew = sizeof(DT) * 8;
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
for (uint32_t i = 0; i < vl * nfields; i++) {
@ -1328,7 +1330,7 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
}
}
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
switch (vsew) {
case 8:
vector_op_vv_store<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
@ -1364,15 +1366,20 @@ void vector_op_vix(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
std::abort();
}
@ -1391,15 +1398,20 @@ void vector_op_vix_carry(DT first, std::vector<std::vector<Byte>> &vreg_file, ui
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_carry(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_carry<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_carry<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_carry<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_carry<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl);
} else {
break;
default:
std::cout << "Failed to execute VI/VX carry for vsew: " << vsew << std::endl;
std::abort();
}
@ -1422,15 +1434,20 @@ void vector_op_vix_carry_out(DT first, std::vector<std::vector<Byte>> &vreg_file
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vix_carry_out(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_carry_out<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_carry_out<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_carry_out<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_carry_out<OP, DT64, DT128>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX carry out for vsew: " << vsew << std::endl;
std::abort();
}
@ -1447,15 +1464,20 @@ void vector_op_vix_merge(DT first, std::vector<std::vector<Byte>> &vreg_file, ui
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_merge(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_merge<DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_merge<DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_merge<DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_merge<DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
std::abort();
}
@ -1467,15 +1489,20 @@ void vector_op_scalar(DT &dest, std::vector<std::vector<Byte>> &vreg_file, uint3
std::cout << "Vwxunary0/Vwfunary0 has unsupported value for vs2: " << rsrc0 << std::endl;
std::abort();
}
if (vsew == 8) {
switch (vsew) {
case 8:
dest = getVregData<uint8_t>(vreg_file, rsrc1, 0);
} else if (vsew == 16) {
break;
case 16:
dest = getVregData<uint16_t>(vreg_file, rsrc1, 0);
} else if (vsew == 32) {
break;
case 32:
dest = getVregData<uint32_t>(vreg_file, rsrc1, 0);
} else if (vsew == 64) {
break;
case 64:
dest = getVregData<uint64_t>(vreg_file, rsrc1, 0);
} else {
break;
default:
std::cout << "Failed to execute vmv.x.s/vfmv.f.s for vsew: " << vsew << std::endl;
std::abort();
}
@ -1497,13 +1524,17 @@ void vector_op_vix_w(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_w<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_w<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_w<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX widening for vsew: " << vsew << std::endl;
std::abort();
}
@ -1511,13 +1542,17 @@ void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint3
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_wx(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX widening wx for vsew: " << vsew << std::endl;
std::abort();
}
@ -1538,13 +1573,17 @@ void vector_op_vix_n(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_n(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_n<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_n<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_n<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else {
break;
default:
std::cout << "Failed to execute VI/VX narrowing for vsew: " << vsew << std::endl;
std::abort();
}
@ -1565,15 +1604,20 @@ void vector_op_vix_sat(DTR first, std::vector<std::vector<Byte>> &vreg_file, uin
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_sat<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_sat<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_sat<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_sat<OP, DT128, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else {
break;
default:
std::cout << "Failed to execute VI/VX saturating for vsew: " << vsew << std::endl;
std::abort();
}
@ -1581,15 +1625,20 @@ void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uin
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_scale(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_sat<OP, DT8, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_sat<OP, DT16, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_sat<OP, DT32, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_sat<OP, DT64, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
} else {
break;
default:
std::cout << "Failed to execute VI/VX scale for vsew: " << vsew << std::endl;
std::abort();
}
@ -1676,15 +1725,20 @@ void vector_op_vix_mask(DT first, std::vector<std::vector<Byte>> &vreg_file, uin
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_mask(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_mask<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_mask<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_mask<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_mask<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX integer/float compare mask for vsew: " << vsew << std::endl;
std::abort();
}
@ -1716,15 +1770,20 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_slide(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask, bool scalar) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_slide<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_slide<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_slide<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_slide<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
} else {
break;
default:
std::cout << "Failed to execute VI/VX slide for vsew: " << vsew << std::endl;
std::abort();
}
@ -1744,15 +1803,20 @@ void vector_op_vix_gather(Word first, std::vector<std::vector<Byte>> &vreg_file,
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vix_gather(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vix_gather<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vix_gather<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vix_gather<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vix_gather<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
} else {
break;
default:
std::cout << "Failed to execute VI/VX register gather for vsew: " << vsew << std::endl;
std::abort();
}
@ -1775,15 +1839,20 @@ void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uin
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
std::abort();
}
@ -1803,15 +1872,20 @@ void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_carry<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_carry<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_carry<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_carry<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else {
break;
default:
std::cout << "Failed to execute VV carry for vsew: " << vsew << std::endl;
std::abort();
}
@ -1835,15 +1909,20 @@ void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_carry_out<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_carry_out<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_carry_out<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_carry_out<OP, DT64, DT128>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV carry out for vsew: " << vsew << std::endl;
std::abort();
}
@ -1861,15 +1940,20 @@ void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_merge<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_merge<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_merge<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_merge<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
std::abort();
}
@ -1890,15 +1974,20 @@ void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, bool ei16, uint32_t vlmax, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_gather<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_gather<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_gather<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_gather<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV register gather for vsew: " << vsew << std::endl;
std::abort();
}
@ -1921,13 +2010,17 @@ void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV widening for vsew: " << vsew << std::endl;
std::abort();
}
@ -1950,13 +2043,17 @@ void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_wv<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_wv<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_wv<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV widening wv for vsew: " << vsew << std::endl;
std::abort();
}
@ -2003,13 +2100,17 @@ void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_n<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_n<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_n<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else {
break;
default:
std::cout << "Failed to execute VV narrowing for vsew: " << vsew << std::endl;
std::abort();
}
@ -2031,15 +2132,20 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_sat<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_sat<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_sat<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_sat<OP, DT128, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else {
break;
default:
std::cout << "Failed to execute VV saturating for vsew: " << vsew << std::endl;
std::abort();
}
@ -2047,15 +2153,20 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_scale(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_sat<OP, DT8, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_sat<OP, DT16, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_sat<OP, DT32, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_sat<OP, DT64, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
} else {
break;
default:
std::cout << "Failed to execute VV scale for vsew: " << vsew << std::endl;
std::abort();
}
@ -2081,15 +2192,20 @@ void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_red<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_red<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_red<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_red<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV reduction for vsew: " << vsew << std::endl;
std::abort();
}
@ -2116,13 +2232,17 @@ void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_red_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_red_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_red_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV widening reduction for vsew: " << vsew << std::endl;
std::abort();
}
@ -2169,15 +2289,20 @@ void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, ui
}
void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vid<uint8_t>(vreg_file, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vid<uint16_t>(vreg_file, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vid<uint32_t>(vreg_file, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vid<uint64_t>(vreg_file, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute vector element index for vsew: " << vsew << std::endl;
std::abort();
}
@ -2203,15 +2328,20 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_mask<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_mask<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_mask<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_mask<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
} else {
break;
default:
std::cout << "Failed to execute VV integer/float compare mask for vsew: " << vsew << std::endl;
std::abort();
}
@ -2252,15 +2382,20 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r
template <typename DT8, typename DT16, typename DT32, typename DT64>
void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
if (vsew == 8) {
switch (vsew) {
case 8:
vector_op_vv_compress<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else if (vsew == 16) {
break;
case 16:
vector_op_vv_compress<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else if (vsew == 32) {
break;
case 32:
vector_op_vv_compress<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else if (vsew == 64) {
break;
case 64:
vector_op_vv_compress<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
} else {
break;
default:
std::cout << "Failed to execute VV compression for vsew: " << vsew << std::endl;
std::abort();
}
@ -2303,7 +2438,7 @@ void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data
std::abort();
}
DP(4, "Whole vector register load with nreg: " << nreg);
uint32_t vsew_bits = 1 << (3 * instr.getVsew());
uint32_t vsew_bits = 1 << (3 + instr.getVsew());
uint32_t vl = nreg * VLEN / vsew_bits;
WordI stride = instr.getVsew();
vector_op_vix_load(warp.vreg_file, this, rsdata[0][0].i, rdest, vsew_bits, vl, false, stride, 1, 0, vmask);
@ -2356,7 +2491,7 @@ void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
uint32_t nfields = instr.getVnf() + 1;
uint32_t vsew_bits = 1 << (3 * instr.getVsew());
uint32_t vsew_bits = 1 << (3 + instr.getVsew());
vector_op_vv_load(warp.vreg_file, this, rsdata[0][0].i, instr.getRSrc(1), rdest, warp.vtype.vsew, vsew_bits, warp.vl, nfields, warp.vtype.vlmul, vmask);
break;
}
@ -2438,7 +2573,7 @@ void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_dat
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
uint32_t nfields = instr.getVnf() + 1;
uint32_t vsew_bits = 1 << (3 * instr.getVsew());
uint32_t vsew_bits = 1 << (3 + instr.getVsew());
vector_op_vv_store(warp.vreg_file, this, rsdata[0][0].i, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, vsew_bits, warp.vl, nfields, warp.vtype.vlmul, vmask);
break;
}