minor fixes

2025-06-27 08:50:02 -04:00 · 2024-12-05 22:38:04 -08:00 · 2024-12-05 22:38:04 -08:00 · 115ff2b599
commit 115ff2b599
parent 896c59306c
5 changed files with 308 additions and 177 deletions
--- a/ci/regression.sh.in
+++ b/ci/regression.sh.in
@ -363,8 +363,8 @@ scope()
 {
    echo "begin scope tests..."

-    SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
-    SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
+    SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
+    SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope

    echo "debugging scope done!"
 }
@ -385,7 +385,7 @@ synthesis()
    echo "begin synthesis tests..."

    PREFIX=build_base make -C hw/syn/yosys clean
-    PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
+    PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis

    echo "synthesis tests done!"
 }
--- a/perf/cache/cache_perf.log
+++ b/perf/cache/cache_perf.log
@ -1,3 +0,0 @@
-CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2   -DPERF_ENABLE -DICACHE_NUM_WAYS=1
-running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 make -C ./ci/../driver/rtlsim
-verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2   -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/softfloat_ext.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2   -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so
--- a/perf/cache/run.sh
+++ b/perf/cache/run.sh
@ -10,17 +10,17 @@ sgemm()
 {
 echo "begin cache tests"

-CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > ./perf/cache/cache_perf.log
-echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
-CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
-echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log 
-CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
-echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
-CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
-echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
-CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
-echo -e "\n**************************************\n" >> ./perf/cache/cache_perf.log
-CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> ./perf/cache/cache_perf.log
+CONFIGS="-DICACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' > cache_perf.log
+echo -e "\n**************************************\n" >> cache_perf.log
+CONFIGS="-DDCACHE_NUM_WAYS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
+echo -e "\n**************************************\n" >> cache_perf.log
+CONFIGS="-DICACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
+echo -e "\n**************************************\n" >> cache_perf.log
+CONFIGS="-DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
+echo -e "\n**************************************\n" >> cache_perf.log
+CONFIGS="-DICACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log
+echo -e "\n**************************************\n" >> cache_perf.log
+CONFIGS="-DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n64" --perf=1 | grep 'PERF' >> cache_perf.log

 echo "cache tests done!"
 }
@ -36,6 +36,6 @@ case $1 in
    -h | --help ) usage
                    ;;
    * ) sgemm
-        ;;             
+        ;;
 esac
 shift
--- a/sim/common/softfloat_ext.cpp
+++ b/sim/common/softfloat_ext.cpp
@ -148,10 +148,9 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
      59,  58,  57,  56,  56,  55,  54,  53};

  if (sub) {
-    while (extract64(sig, s - 1, 1) == 0) {
-      exp--;
-      sig <<= 1;
-    }
+    while (extract64(sig, s - 1, 1) == 0)
+      exp--, sig <<= 1;
+      
    sig = (sig << 1) & make_mask64(0, s);
  }

@ -358,9 +357,9 @@ float16_t f16_recip7(float16_t in) {
    [[fallthrough]];
  default: // +- normal
    uA.ui = recip7(uA.ui, 5, 10, softfloat_roundingMode, sub, &round_abnormal);
-    if (round_abnormal)
-      softfloat_exceptionFlags |=
-          softfloat_flag_inexact | softfloat_flag_overflow;
+    if (round_abnormal) {
+      softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
+    }
    break;
  }

@ -401,9 +400,9 @@ float32_t f32_recip7(float32_t in) {
    [[fallthrough]];
  default: // +- normal
    uA.ui = recip7(uA.ui, 8, 23, softfloat_roundingMode, sub, &round_abnormal);
-    if (round_abnormal)
-      softfloat_exceptionFlags |=
-          softfloat_flag_inexact | softfloat_flag_overflow;
+    if (round_abnormal) {
+      softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
+    }
    break;
  }

@ -444,9 +443,9 @@ float64_t f64_recip7(float64_t in) {
    [[fallthrough]];
  default: // +- normal
    uA.ui = recip7(uA.ui, 11, 52, softfloat_roundingMode, sub, &round_abnormal);
-    if (round_abnormal)
-      softfloat_exceptionFlags |=
-          softfloat_flag_inexact | softfloat_flag_overflow;
+    if (round_abnormal) {
+      softfloat_exceptionFlags |= softfloat_flag_inexact | softfloat_flag_overflow;
+    }
    break;
  }

--- a/sim/simx/execute_v.cpp
+++ b/sim/simx/execute_v.cpp
@ -44,7 +44,7 @@ template <typename T, typename R>
 class Madc {
 public:
  static R apply(T first, T second, R third) {
-    return (R)first + (R)second + third > (R)std::numeric_limits<T>::max();
+    return ((R)first + (R)second + third) > (R)std::numeric_limits<T>::max();
  }
  static std::string name() { return "Madc"; }
 };
@ -62,7 +62,7 @@ template <typename T, typename R>
 class Msbc {
 public:
  static R apply(T first, T second, R third) {
-    return (R)second < (R)first + third;
+    return (R)second < ((R)first + third);
  }
  static std::string name() { return "Msbc"; }
 };
@ -1128,6 +1128,8 @@ public:
  static std::string name() { return "Smul"; }
 };

+///////////////////////////////////////////////////////////////////////////////
+
 bool isMasked(std::vector<std::vector<Byte>> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) {
  auto &mask = vreg_file.at(maskVreg);
  uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8);
@ -1155,7 +1157,7 @@ DT &getVregData(std::vector<std::vector<vortex::Byte>> &vreg_file, uint32_t base
 }

 template <typename DT>
-void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  uint32_t vsew = sizeof(DT) * 8;
  uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
  if (nfields * emul > 8) {
@ -1177,7 +1179,7 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
  }
 }

-void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  switch (vsew) {
  case 8:
    vector_op_vix_load<uint8_t>(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask);
@ -1198,7 +1200,7 @@ void vector_op_vix_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
 }

 template <typename DT>
-void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  uint32_t vsew = sizeof(DT) * 8;
  uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
  if (nfields * emul > 8) {
@ -1238,7 +1240,7 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
  }
 }

-void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  switch (vsew) {
  case 8:
    vector_op_vv_load<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask);
@ -1259,7 +1261,7 @@ void vector_op_vv_load(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulat
 }

 template <typename DT>
-void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  uint32_t vsew = sizeof(DT) * 8;
  uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
  for (uint32_t i = 0; i < vl * nfields; i++) {
@ -1274,7 +1276,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
  }
 }

-void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  switch (vsew) {
  case 8:
    vector_op_vix_store<uint8_t>(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask);
@ -1295,7 +1297,7 @@ void vector_op_vix_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emul
 }

 template <typename DT>
-void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  uint32_t vsew = sizeof(DT) * 8;
  uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11);
  for (uint32_t i = 0; i < vl * nfields; i++) {
@ -1328,7 +1330,7 @@ void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emula
  }
 }

-void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, Word base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
+void vector_op_vv_store(std::vector<std::vector<Byte>> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) {
  switch (vsew) {
  case 8:
    vector_op_vv_store<uint8_t>(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask);
@ -1364,15 +1366,20 @@ void vector_op_vix(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32_t

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1391,15 +1398,20 @@ void vector_op_vix_carry(DT first, std::vector<std::vector<Byte>> &vreg_file, ui

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_carry(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_carry<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_carry<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_carry<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_carry<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX carry for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1422,15 +1434,20 @@ void vector_op_vix_carry_out(DT first, std::vector<std::vector<Byte>> &vreg_file

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
 void vector_op_vix_carry_out(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_carry_out<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_carry_out<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_carry_out<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_carry_out<OP, DT64, DT128>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX carry out for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1447,15 +1464,20 @@ void vector_op_vix_merge(DT first, std::vector<std::vector<Byte>> &vreg_file, ui

 template <typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_merge(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_merge<DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_merge<DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_merge<DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_merge<DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1467,15 +1489,20 @@ void vector_op_scalar(DT &dest, std::vector<std::vector<Byte>> &vreg_file, uint3
    std::cout << "Vwxunary0/Vwfunary0 has unsupported value for vs2: " << rsrc0 << std::endl;
    std::abort();
  }
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    dest = getVregData<uint8_t>(vreg_file, rsrc1, 0);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    dest = getVregData<uint16_t>(vreg_file, rsrc1, 0);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    dest = getVregData<uint32_t>(vreg_file, rsrc1, 0);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    dest = getVregData<uint64_t>(vreg_file, rsrc1, 0);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute vmv.x.s/vfmv.f.s for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1497,13 +1524,17 @@ void vector_op_vix_w(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_w<OP, DT8, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_w<OP, DT16, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_w<OP, DT32, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX widening for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1511,13 +1542,17 @@ void vector_op_vix_w(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint3

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_wx(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX widening wx for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1538,13 +1573,17 @@ void vector_op_vix_n(DT first, std::vector<std::vector<Byte>> &vreg_file, uint32

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_n(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_n<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_n<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_n<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX narrowing for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1565,15 +1604,20 @@ void vector_op_vix_sat(DTR first, std::vector<std::vector<Byte>> &vreg_file, uin

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
 void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_sat<OP, DT16, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_sat<OP, DT32, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_sat<OP, DT64, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_sat<OP, DT128, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX saturating for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1581,15 +1625,20 @@ void vector_op_vix_sat(Word src1, std::vector<std::vector<Byte>> &vreg_file, uin

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_scale(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_sat<OP, DT8, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_sat<OP, DT16, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_sat<OP, DT32, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_sat<OP, DT64, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask, vxrm, vxsat);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX scale for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1676,15 +1725,20 @@ void vector_op_vix_mask(DT first, std::vector<std::vector<Byte>> &vreg_file, uin

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_mask(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_mask<OP, DT8>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_mask<OP, DT16>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_mask<OP, DT32>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_mask<OP, DT64>(src1, vreg_file, rsrc0, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX integer/float compare mask for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1716,15 +1770,20 @@ void vector_op_vix_slide(Word first, std::vector<std::vector<Byte>> &vreg_file,

 template <typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_slide(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask, bool scalar) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_slide<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_slide<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_slide<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_slide<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask, scalar);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX slide for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1744,15 +1803,20 @@ void vector_op_vix_gather(Word first, std::vector<std::vector<Byte>> &vreg_file,

 template <typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vix_gather(Word src1, std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rdest, uint32_t vsew, uint32_t vl, Word vlmax, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vix_gather<DT8>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vix_gather<DT16>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vix_gather<DT32>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vix_gather<DT64>(src1, vreg_file, rsrc0, rdest, vl, vlmax, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VI/VX register gather for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1775,15 +1839,20 @@ void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uin

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1803,15 +1872,20 @@ void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_carry(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_carry<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_carry<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_carry<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_carry<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV carry for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1835,15 +1909,20 @@ void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
 void vector_op_vv_carry_out(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_carry_out<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_carry_out<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_carry_out<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_carry_out<OP, DT64, DT128>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV carry out for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1861,15 +1940,20 @@ void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc

 template <typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_merge(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_merge<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_merge<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_merge<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_merge<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1890,15 +1974,20 @@ void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsr

 template <typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_gather(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, bool ei16, uint32_t vlmax, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_gather<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_gather<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_gather<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_gather<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, ei16, vlmax, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV register gather for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1921,13 +2010,17 @@ void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV widening for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -1950,13 +2043,17 @@ void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_wv(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_wv<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_wv<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_wv<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV widening wv for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2003,13 +2100,17 @@ void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, u

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_n(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_n<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_n<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_n<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV narrowing for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2031,15 +2132,20 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64, typename DT128>
 void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_sat<OP, DT16, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_sat<OP, DT32, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_sat<OP, DT64, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_sat<OP, DT128, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV saturating for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2047,15 +2153,20 @@ void vector_op_vv_sat(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_scale(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask, uint32_t vxrm, uint32_t &vxsat) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_sat<OP, DT8, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_sat<OP, DT16, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_sat<OP, DT32, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_sat<OP, DT64, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask, vxrm, vxsat);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV scale for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2081,15 +2192,20 @@ void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0,

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_red(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_red<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_red<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_red<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_red<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV reduction for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2116,13 +2232,17 @@ void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_red_w(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_red_w<OP, DT8, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_red_w<OP, DT16, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_red_w<OP, DT32, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV widening reduction for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2169,15 +2289,20 @@ void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, ui
 }

 void vector_op_vid(std::vector<std::vector<Byte>> &vreg_file, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vid<uint8_t>(vreg_file, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vid<uint16_t>(vreg_file, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vid<uint32_t>(vreg_file, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vid<uint64_t>(vreg_file, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute vector element index for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2203,15 +2328,20 @@ void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0

 template <template <typename DT1, typename DT2> class OP, typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_mask(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl, uint32_t vmask) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_mask<OP, DT8>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_mask<OP, DT16>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_mask<OP, DT32>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_mask<OP, DT64>(vreg_file, rsrc0, rsrc1, rdest, vl, vmask);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV integer/float compare mask for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2252,15 +2382,20 @@ void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t r

 template <typename DT8, typename DT16, typename DT32, typename DT64>
 void vector_op_vv_compress(std::vector<std::vector<Byte>> &vreg_file, uint32_t rsrc0, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t vl) {
-  if (vsew == 8) {
+  switch (vsew) {
+  case 8:
    vector_op_vv_compress<DT8>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else if (vsew == 16) {
+    break;
+  case 16:
    vector_op_vv_compress<DT16>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else if (vsew == 32) {
+    break;
+  case 32:
    vector_op_vv_compress<DT32>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else if (vsew == 64) {
+    break;
+  case 64:
    vector_op_vv_compress<DT64>(vreg_file, rsrc0, rsrc1, rdest, vl);
-  } else {
+    break;
+  default:
    std::cout << "Failed to execute VV compression for vsew: " << vsew << std::endl;
    std::abort();
  }
@ -2303,7 +2438,7 @@ void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data
        std::abort();
      }
      DP(4, "Whole vector register load with nreg: " << nreg);
-      uint32_t vsew_bits = 1 << (3 * instr.getVsew());
+      uint32_t vsew_bits = 1 << (3 + instr.getVsew());
      uint32_t vl = nreg * VLEN / vsew_bits;
      WordI stride = instr.getVsew();
      vector_op_vix_load(warp.vreg_file, this, rsdata[0][0].i, rdest, vsew_bits, vl, false, stride, 1, 0, vmask);
@ -2356,7 +2491,7 @@ void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data
               // vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
               // vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
    uint32_t nfields = instr.getVnf() + 1;
-    uint32_t vsew_bits = 1 << (3 * instr.getVsew());
+    uint32_t vsew_bits = 1 << (3 + instr.getVsew());
    vector_op_vv_load(warp.vreg_file, this, rsdata[0][0].i, instr.getRSrc(1), rdest, warp.vtype.vsew, vsew_bits, warp.vl, nfields, warp.vtype.vlmul, vmask);
    break;
  }
@ -2438,7 +2573,7 @@ void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector<reg_dat
               // vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
               // vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
    uint32_t nfields = instr.getVnf() + 1;
-    uint32_t vsew_bits = 1 << (3 * instr.getVsew());
+    uint32_t vsew_bits = 1 << (3 + instr.getVsew());
    vector_op_vv_store(warp.vreg_file, this, rsdata[0][0].i, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, vsew_bits, warp.vl, nfields, warp.vtype.vlmul, vmask);
    break;
  }