mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
minor fixes
This commit is contained in:
parent
896c59306c
commit
115ff2b599
5 changed files with 308 additions and 177 deletions
|
@ -363,8 +363,8 @@ scope()
|
|||
{
|
||||
echo "begin scope tests..."
|
||||
|
||||
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
|
||||
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
|
||||
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
|
||||
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
|
||||
|
||||
echo "debugging scope done!"
|
||||
}
|
||||
|
@ -385,7 +385,7 @@ synthesis()
|
|||
echo "begin synthesis tests..."
|
||||
|
||||
PREFIX=build_base make -C hw/syn/yosys clean
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis
|
||||
|
||||
echo "synthesis tests done!"
|
||||
}
|
||||
|
|
3
perf/cache/cache_perf.log
vendored
3
perf/cache/cache_perf.log
vendored
|
@ -1,3 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 make -C ./ci/../driver/rtlsim
|
||||
verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/softfloat_ext.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so
|
24
perf/cache/run.sh
vendored
24
perf/cache/run.sh
vendored
|
@ -10,17 +10,17 @@ sgemm()
|
|||
{
|
||||
echo "begin cache tests"
|
||||
|
||||
CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > cache_perf.log
|
||||
echo -e "\n**************************************\n" >> cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
|
||||
echo -e "\n**************************************\n" >> cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
|
||||
echo -e "\n**************************************\n" >> cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
|
||||
echo -e "\n**************************************\n" >> cache_perf.log
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
|
||||
echo -e "\n**************************************\n" >> cache_perf.log
|
||||
CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
|
||||
|
||||
echo "cache tests done!"
|
||||
}
|
||||
|
@ -36,6 +36,6 @@ case $1 in
|
|||
-h | --help ) usage
|
||||
;;
|
||||
* ) sgemm
|
||||
;;
|
||||
;;
|
||||
esac
|
||||
shift
|
|
@ -148,10 +148,9 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
|
|||
59, 58, 57, 56, 56, 55, 54, 53};
|
||||
|
||||
if (sub) {
|
||||
while (extract64(sig, s - 1, 1) == 0) {
|
||||
exp--;
|
||||
sig <<= 1;
|
||||
}
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
|
||||
sig = (sig << 1) & make_mask64(0, s);
|
||||
}
|
||||
|
||||
|
@ -358,9 +357,9 @@ float16_t f16_recip7(float16_t in) {
|
|||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 5, 10, softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |=
|
||||
softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
if (round_abnormal) {
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -401,9 +400,9 @@ float32_t f32_recip7(float32_t in) {
|
|||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 8, 23, softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |=
|
||||
softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
if (round_abnormal) {
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -444,9 +443,9 @@ float64_t f64_recip7(float64_t in) {
|
|||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 11, 52, softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |=
|
||||
softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
if (round_abnormal) {
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ template <typename T, typename R>
|
|||
class Madc {
|
||||
public:
|
||||
static R apply(T first, T second, R third) {
|
||||
return (R)first + (R)second + third > (R)std::numeric_limits<T>::max();
|
||||
return ((R)first + (R)second + third) > (R)std::numeric_limits<T>::max();
|
||||
}
|
||||
static std::string name() { return "Madc"; }
|
||||
};
|
||||
|
@ -62,7 +62,7 @@ template <typename T, typename R>
|
|||
class Msbc {
|
||||
public:
|
||||
static R apply(T first, T second, R third) {
|
||||
return (R)second < (R)first + third;
|
||||
return (R)second < ((R)first + third);
|
||||
}
|
||||
static std::string name() { return "Msbc"; }
|
||||
};
|
||||
|
@ -1128,6 +1128,8 @@ public:
|
|||
static std::string name() { return "Smul"; }
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool isMasked(std::vector<std::vector<Byte>> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) {
|
||||
auto &mask = vreg_file.at(maskVreg);
|
||||
uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8);
|
||||
|
@ -1155,7 +1157,7 @@ DT &getVregData(std::vector<std::vector<vortex::Byte>> &vreg_file, uint32_t base
|
|||
}
|
||||
|
||||
template <typename DT>
|
||||
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
uint32_t vsew = sizeof(DT) * 8;
|
||||
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
|
||||
if (nfields * emul > 8) {
|
||||
|
@ -1177,7 +1179,7 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
|
|||
}
|
||||
}
|
||||
|
||||
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_load<uint8_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
|
||||
|
@ -1198,7 +1200,7 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
|
|||
}
|
||||
|
||||
template <typename DT>
|
||||
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
uint32_t vsew = sizeof(DT) * 8;
|
||||
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
|
||||
if (nfields * emul > 8) {
|
||||
|
@ -1238,7 +1240,7 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
|
|||
}
|
||||
}
|
||||
|
||||
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_load<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
|
||||
|
@ -1259,7 +1261,7 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
|
|||
}
|
||||
|
||||
template <typename DT>
|
||||
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
uint32_t vsew = sizeof(DT) * 8;
|
||||
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
|
||||
for (uint32_t i = 0; i < vl * nfields; i++) {
|
||||
|
@ -1274,7 +1276,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
|
|||
}
|
||||
}
|
||||
|
||||
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_store<uint8_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
|
||||
|
@ -1295,7 +1297,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
|
|||
}
|
||||
|
||||
template <typename DT>
|
||||
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
uint32_t vsew = sizeof(DT) * 8;
|
||||
uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
|
||||
for (uint32_t i = 0; i < vl * nfields; i++) {
|
||||
|
@ -1328,7 +1330,7 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
|
|||
}
|
||||
}
|
||||
|
||||
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_store<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
|
||||
|
@ -1364,15 +1366,20 @@ void vector_op_vix(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1391,15 +1398,20 @@ void vector_op_vix_carry(DT first, std::vector<std::vector<Byte>> &vreg_file, ui
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_carry(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_carry<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_carry<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_carry<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_carry<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX carry for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1422,15 +1434,20 @@ void vector_op_vix_carry_out(DT first, std::vector<std::vector<Byte>> &vreg_file
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
|
||||
void vector_op_vix_carry_out(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_carry_out<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_carry_out<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_carry_out<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_carry_out<OP, DT64, DT128>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX carry out for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1447,15 +1464,20 @@ void vector_op_vix_merge(DT first, std::vector<std::vector<Byte>> &vreg_file, ui
|
|||
|
||||
template <typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_merge(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_merge<DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_merge<DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_merge<DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_merge<DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1467,15 +1489,20 @@ void vector_op_scalar(DT &dest, std::vector<std::vector<Byte>> &vreg_file, uint3
|
|||
std::cout << "Vwxunary0/Vwfunary0 has unsupported value for vs2: " << rsrc0 << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
dest = getVregData<uint8_t>(vreg_file, rsrc1, 0);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
dest = getVregData<uint16_t>(vreg_file, rsrc1, 0);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
dest = getVregData<uint32_t>(vreg_file, rsrc1, 0);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
dest = getVregData<uint64_t>(vreg_file, rsrc1, 0);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute vmv.x.s/vfmv.f.s for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1497,13 +1524,17 @@ void vector_op_vix_w(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_w<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_w<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_w<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX widening for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1511,13 +1542,17 @@ void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint3
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_wx(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX widening wx for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1538,13 +1573,17 @@ void vector_op_vix_n(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_n(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_n<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_n<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_n<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX narrowing for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1565,15 +1604,20 @@ void vector_op_vix_sat(DTR first, std::vector<std::vector<Byte>> &vreg_file, uin
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
|
||||
void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_sat<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_sat<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_sat<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_sat<OP, DT128, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX saturating for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1581,15 +1625,20 @@ void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uin
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_scale(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_sat<OP, DT8, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_sat<OP, DT16, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_sat<OP, DT32, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_sat<OP, DT64, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX scale for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1676,15 +1725,20 @@ void vector_op_vix_mask(DT first, std::vector<std::vector<Byte>> &vreg_file, uin
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_mask(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_mask<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_mask<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_mask<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_mask<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX integer/float compare mask for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1716,15 +1770,20 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,
|
|||
|
||||
template <typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_slide(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask, bool scalar) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_slide<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_slide<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_slide<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_slide<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX slide for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1744,15 +1803,20 @@ void vector_op_vix_gather(Word first, std::vector<std::vector<Byte>> &vreg_file,
|
|||
|
||||
template <typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vix_gather(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vix_gather<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vix_gather<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vix_gather<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vix_gather<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VI/VX register gather for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1775,15 +1839,20 @@ void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uin
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1803,15 +1872,20 @@ void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_carry<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_carry<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_carry<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_carry<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV carry for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1835,15 +1909,20 @@ void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
|
||||
void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_carry_out<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_carry_out<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_carry_out<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_carry_out<OP, DT64, DT128>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV carry out for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1861,15 +1940,20 @@ void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
|
|||
|
||||
template <typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_merge<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_merge<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_merge<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_merge<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1890,15 +1974,20 @@ void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr
|
|||
|
||||
template <typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, bool ei16, uint32_t vlmax, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_gather<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_gather<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_gather<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_gather<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV register gather for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1921,13 +2010,17 @@ void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV widening for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1950,13 +2043,17 @@ void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_wv<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_wv<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_wv<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV widening wv for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2003,13 +2100,17 @@ void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_n<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_n<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_n<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV narrowing for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2031,15 +2132,20 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
|
||||
void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_sat<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_sat<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_sat<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_sat<OP, DT128, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV saturating for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2047,15 +2153,20 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_scale(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_sat<OP, DT8, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_sat<OP, DT16, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_sat<OP, DT32, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_sat<OP, DT64, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV scale for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2081,15 +2192,20 @@ void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_red<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_red<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_red<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_red<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV reduction for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2116,13 +2232,17 @@ void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_red_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_red_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_red_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV widening reduction for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2169,15 +2289,20 @@ void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, ui
|
|||
}
|
||||
|
||||
void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vid<uint8_t>(vreg_file, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vid<uint16_t>(vreg_file, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vid<uint32_t>(vreg_file, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vid<uint64_t>(vreg_file, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute vector element index for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2203,15 +2328,20 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0
|
|||
|
||||
template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_mask<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_mask<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_mask<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_mask<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV integer/float compare mask for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2252,15 +2382,20 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r
|
|||
|
||||
template <typename DT8, typename DT16, typename DT32, typename DT64>
|
||||
void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
|
||||
if (vsew == 8) {
|
||||
switch (vsew) {
|
||||
case 8:
|
||||
vector_op_vv_compress<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else if (vsew == 16) {
|
||||
break;
|
||||
case 16:
|
||||
vector_op_vv_compress<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else if (vsew == 32) {
|
||||
break;
|
||||
case 32:
|
||||
vector_op_vv_compress<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else if (vsew == 64) {
|
||||
break;
|
||||
case 64:
|
||||
vector_op_vv_compress<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
std::cout << "Failed to execute VV compression for vsew: " << vsew << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
@ -2303,7 +2438,7 @@ void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data
|
|||
std::abort();
|
||||
}
|
||||
DP(4, "Whole vector register load with nreg: " << nreg);
|
||||
uint32_t vsew_bits = 1 << (3 * instr.getVsew());
|
||||
uint32_t vsew_bits = 1 << (3 + instr.getVsew());
|
||||
uint32_t vl = nreg * VLEN / vsew_bits;
|
||||
WordI stride = instr.getVsew();
|
||||
vector_op_vix_load(warp.vreg_file, this, rsdata[0][0].i, rdest, vsew_bits, vl, false, stride, 1, 0, vmask);
|
||||
|
@ -2356,7 +2491,7 @@ void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data
|
|||
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
|
||||
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
uint32_t vsew_bits = 1 << (3 * instr.getVsew());
|
||||
uint32_t vsew_bits = 1 << (3 + instr.getVsew());
|
||||
vector_op_vv_load(warp.vreg_file, this, rsdata[0][0].i, instr.getRSrc(1), rdest, warp.vtype.vsew, vsew_bits, warp.vl, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
|
@ -2438,7 +2573,7 @@ void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_dat
|
|||
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
|
||||
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
|
||||
uint32_t nfields = instr.getVnf() + 1;
|
||||
uint32_t vsew_bits = 1 << (3 * instr.getVsew());
|
||||
uint32_t vsew_bits = 1 << (3 + instr.getVsew());
|
||||
vector_op_vv_store(warp.vreg_file, this, rsdata[0][0].i, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, vsew_bits, warp.vl, nfields, warp.vtype.vlmul, vmask);
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue