vortex/sim/simx/vec_unit.cpp
tinebp 2f2a2ed886
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (cupbop, 32) (push) Blocked by required conditions
CI / tests (cupbop, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (tensor, 32) (push) Blocked by required conditions
CI / tests (tensor, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
simx instruction decode refactoring
2025-06-15 14:24:53 -07:00

1821 lines
86 KiB
C++

#include "vec_unit.h"
#include "vec_ops.h"
#include "core.h"
using namespace vortex;
// Simulate clock cycles depending on instruction type and element width and #lanes
// VSET = 1 cycle
// Vector instructions take the same amount of time as ALU instructions.
// In general there should be less overall instructions (hence the SIMD vector speedup).
// But, each vector instruction is bigger, and # of lanes greatly effects execution speed.
// Whenever we change VL using imm/VSET, we need to keep track of the new VL and SEW.
// By default, VL is set to MAXVL.
// After determining VL, we use VL and #lanes in order to determine overall cycle time.
// For example, for a vector add with VL=4 and #lanes=2, we will probably take 2 cycles,
// since we can only operate on two elements of the vector each cycle (limited by #lanes).
// SEW (element width) likely affects the cycle time, we can probably observe
// ALU operation cycle time in relation to element width to determine this though.
// The RTL implementation has an unroll and accumulate stage.
// The unroll stage sends vector elements to the appropriate functional unit up to VL,
// limited by the # lanes available.
// The accumulate stage deals with combining the results from the functional units,
// into the destination vector register.
// Which exact pipeline stage does the VPU unroll the vector (decode or execute)?
// Which exact pipeline stage does the VPU accumulate results?
// How do vector loads and stores interact with the cache?
// How about loading and storing scalars in vector registers?
// How does striding affect loads and stores?
class VecUnit::Impl {
public:
Impl(VecUnit* simobject, const Arch& arch, Core* core)
: simobject_(simobject)
, core_(core)
, vpu_states_(arch.num_warps(), arch.num_threads())
, num_lanes_(arch.num_warps())
, pending_reqs_(arch.num_warps())
{
this->reset();
}
~Impl() {}
void reset() {
pending_reqs_.clear();
perf_stats_ = PerfStats();
}
void tick() {
for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) {
auto& input = simobject_->Inputs.at(iw);
if (input.empty())
return;
auto trace = input.front();
auto trace_data = std::dynamic_pointer_cast<ExeTraceData>(trace->data);
auto vpu_op = trace_data->vpu_op;
int delay = 0;
switch (vpu_op) {
case VpuOpType::VSET:
break;
case VpuOpType::ARITH:
case VpuOpType::ARITH_R:
delay = 1;
break;
case VpuOpType::IMUL:
delay = LATENCY_IMUL;
break;
case VpuOpType::IDIV:
delay = XLEN;
break;
case VpuOpType::FNCP:
case VpuOpType::FNCP_R:
delay = 2;
break;
case VpuOpType::FMA:
case VpuOpType::FMA_R:
delay = LATENCY_FMA;
break;
case VpuOpType::FDIV:
delay = LATENCY_FDIV;
break;
case VpuOpType::FSQRT:
delay = LATENCY_FSQRT;
break;
case VpuOpType::FCVT:
delay = LATENCY_FCVT;
break;
default:
std::abort();
}
simobject_->Outputs.at(iw).push(trace, 2 + delay);
DT(3, simobject_->name() << ": op=" << vpu_op << ", " << *trace);
input.pop();
}
}
void load(const Instr &instr,
uint32_t wid,
uint32_t tid,
const std::vector<reg_data_t>& rs1_data,
const std::vector<reg_data_t>& rs2_data,
MemTraceData* trace_data) {
auto& states = vpu_states_.at(wid);
auto lsuArgs = std::get<IntrVlsArgs>(instr.getArgs());
uint32_t vmask = lsuArgs.vm;
uint32_t vd = instr.getDestReg().idx;
uint32_t mop = lsuArgs.mop;
uint32_t vsewb = 1 << states.vtype.vsew;
assert(lsuArgs.width == states.vtype.vsew && "vsew and width must match!");
auto& vreg_file = states.vreg_file.at(tid);
uint64_t base_addr = rs1_data.at(tid).i;
base_addr &= 0xFFFFFFFC; // TODO: riscv-tests fix
// udpate trace data
trace_data->vl = states.vl;
trace_data->vnf = lsuArgs.nf + 1;
switch (mop) {
case 0b00: { // unit-stride
auto lumop = lsuArgs.umop;
switch (lumop) {
case 0b00000: // vle8.v, vle16.v, vle32.v, vle64.v
// vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v
// vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v
// vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v
// vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v
// vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v
// vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v
// vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v
case 0b10000:{// vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride
// vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v
// vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v
// vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v
// vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v
// vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v
// vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v
// vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v
uint32_t nfields = lsuArgs.nf + 1;
uint32_t emul = (states.vtype.vlmul >> 2) ? 1 : (1 << (states.vtype.vlmul & 0b11));
assert(nfields * emul <= 8);
for (uint32_t i = 0; i < states.vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
for (uint32_t f = 0; f < nfields; f++) {
uint64_t mem_addr = base_addr + (i * nfields + f) * vsewb;
uint64_t mem_data = 0;
core_->dcache_read(&mem_data, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
setVregData(states.vtype.vsew, vreg_file, vd + f * emul, i, mem_data);
}
}
break;
}
case 0b01000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v
uint32_t nreg = lsuArgs.nf + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl;
std::abort();
}
uint32_t eew = lsuArgs.width & 0x3;
uint32_t stride = 1 << eew;
uint32_t vl = nreg * (VLENB / vsewb);
trace_data->vl = vl;
trace_data->vnf = 1;
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
uint64_t mem_addr = base_addr + i * stride;
uint64_t mem_data = 0;
core_->dcache_read(&mem_data, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
setVregData(states.vtype.vsew, vreg_file, vd, i, mem_data);
}
break;
}
case 0b01011: { // vlm.v
if (states.vtype.vsew != 0) {
std::cout << "vlm.v only supports SEW=8, but SEW was: " << states.vtype.vsew << std::endl;
std::abort();
}
uint32_t vl = (states.vl + 7) / 8;
uint32_t stride = vsewb;
trace_data->vl = vl;
trace_data->vnf = 1;
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, 1))
continue;
uint64_t mem_addr = base_addr + i * stride;
uint64_t mem_data = 0;
core_->dcache_read(&mem_data, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
setVregData(states.vtype.vsew, vreg_file, vd, i, mem_data);
}
break;
}
default:
std::cout << "Load vector - unsupported lumop: " << lumop << std::endl;
std::abort();
}
break;
}
case 0b10: {// strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v
// vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v
// vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v
// vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v
// vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v
// vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v
// vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v
// vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v
uint32_t nfields = lsuArgs.nf + 1;
uint32_t emul = (states.vtype.vlmul >> 2) ? 1 : (1 << (states.vtype.vlmul & 0b11));
assert(nfields * emul <= 8);
WordI stride = rs2_data.at(tid).i;
for (uint32_t i = 0; i < states.vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
for (uint32_t f = 0; f < nfields; f++) {
WordI offset = i * stride + f * vsewb;
uint64_t mem_addr = base_addr + offset;
uint64_t mem_data = 0;
core_->dcache_read(&mem_data, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
setVregData(states.vtype.vsew, vreg_file, vd + f * emul, i, mem_data);
}
}
break;
}
case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v
// vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v
// vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v
// vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v
// vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v
// vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v
// vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v
// vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v
case 0b11: {// indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v
// vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v
// vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v
// vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v
// vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v
// vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v
// vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v
// vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v
uint32_t vs2 = instr.getSrcReg(1).idx;
uint32_t nfields = lsuArgs.nf + 1;
uint32_t eew = lsuArgs.width & 0x3;
uint32_t emul = states.vtype.vlmul >> 2 ? 1 : 1 << (states.vtype.vlmul & 0b11);
assert(nfields * emul <= 8);
for (uint32_t i = 0; i < states.vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
uint64_t offset = getVregData(eew, vreg_file, vs2, i);
for (uint32_t f = 0; f < nfields; f++) {
uint64_t mem_addr = base_addr + offset + f * vsewb;
uint64_t mem_data = 0;
core_->dcache_read(&mem_data, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
setVregData(states.vtype.vsew, vreg_file, vd + f * emul, i, mem_data);
}
}
break;
}
default:
std::cout << "Load vector - unsupported mop: " << mop << std::endl;
std::abort();
}
}
void store(const Instr &instr,
uint32_t wid,
uint32_t tid,
const std::vector<reg_data_t>& rs1_data,
const std::vector<reg_data_t>& rs2_data,
MemTraceData* trace_data) {
auto& states = vpu_states_.at(wid);
auto lsuArgs = std::get<IntrVlsArgs>(instr.getArgs());
uint32_t vmask = lsuArgs.vm;
uint32_t mop = lsuArgs.mop;
uint32_t vsewb = 1 << states.vtype.vsew;
assert(lsuArgs.width == states.vtype.vsew && "vsew and width must match!");
uint32_t vs3 = instr.getSrcReg(2).idx;
auto& vreg_file = states.vreg_file.at(tid);
uint64_t base_addr = rs1_data.at(tid).i;
base_addr &= 0xFFFFFFFC; // TODO: riscv-tests fix
// udpate trace data
trace_data->vl = states.vl;
trace_data->vnf = lsuArgs.nf + 1;
switch (mop) {
case 0b00: { // unit-stride
uint32_t sumop = lsuArgs.umop;
switch (sumop) {
case 0b00000: { // vse8.v, vse16.v, vse32.v, vse64.v
uint32_t nfields = lsuArgs.nf + 1;
uint32_t emul = states.vtype.vlmul >> 2 ? 1 : 1 << (states.vtype.vlmul & 0b11);
assert(nfields * emul <= 8);
for (uint32_t i = 0; i < states.vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
for (uint32_t f = 0; f < nfields; f++) {
uint64_t mem_addr = base_addr + (i * nfields + f) * vsewb;
uint64_t value = getVregData(states.vtype.vsew, vreg_file, vs3 + f * emul, i);
core_->dcache_write(&value, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
}
}
break;
}
case 0b01000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v
uint32_t nreg = lsuArgs.nf + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl;
std::abort();
}
uint32_t stride = vsewb;
uint32_t vl = nreg * (VLENB / vsewb);
trace_data->vl = vl;
trace_data->vnf = 1;
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
uint64_t value = getVregData(states.vtype.vsew, vreg_file, vs3, i);
uint64_t mem_addr = base_addr + i * stride;
core_->dcache_write(&value, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
}
break;
}
case 0b01011: { // vsm.v
if (states.vtype.vsew != 0) {
std::cout << "vsm.v only supports EEW=8, but EEW was: " << states.vtype.vsew << std::endl;
std::abort();
}
uint32_t vl = (states.vl + 7) / 8;
uint32_t stride = vsewb;
trace_data->vl = vl;
trace_data->vnf = 1;
for (uint32_t i = 0; i < vl; i++) {
if (isMasked(vreg_file, 0, i, 1))
continue;
uint64_t mem_addr = base_addr + i * stride;
uint64_t value = getVregData(states.vtype.vsew, vreg_file, vs3, i);
core_->dcache_write(&value, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
}
break;
}
default:
std::cout << "Store vector - unsupported sumop: " << sumop << std::endl;
std::abort();
}
break;
}
case 0b10: {// strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v
// vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v
// vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v
// vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v
// vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v
// vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v
// vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v
// vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v
WordI stride = rs2_data.at(tid).i;
uint32_t nfields = lsuArgs.nf + 1;
uint32_t emul = states.vtype.vlmul >> 2 ? 1 : 1 << (states.vtype.vlmul & 0b11);
assert(nfields * emul <= 8);
for (uint32_t i = 0; i < states.vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
for (uint32_t f = 0; f < nfields; f++) {
WordI offset = i * stride + f * vsewb;
uint64_t mem_addr = base_addr + offset;
uint64_t value = getVregData(states.vtype.vsew, vreg_file, vs3 + f * emul, i);
core_->dcache_write(&value, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
}
}
break;
}
case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v
// vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v
// vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v
// vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v
// vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v
// vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v
// vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v
// vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v
case 0b11: {// indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v
// vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v
// vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v
// vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v
// vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v
// vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v
// vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v
// vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v
uint32_t vs2 = instr.getSrcReg(1).idx;
uint32_t nfields = lsuArgs.nf + 1;
uint32_t eew = lsuArgs.width & 0x3;
uint32_t emul = states.vtype.vlmul >> 2 ? 1 : 1 << (states.vtype.vlmul & 0b11);
assert(nfields * emul <= 8);
for (uint32_t i = 0; i < states.vl; i++) {
if (isMasked(vreg_file, 0, i, vmask))
continue;
uint64_t offset = getVregData(eew, vreg_file, vs2, i);
for (uint32_t f = 0; f < nfields; f++) {
uint64_t mem_addr = base_addr + offset + f * vsewb;
uint64_t value = getVregData(states.vtype.vsew, vreg_file, vs3 + f * emul, i);
core_->dcache_write(&value, mem_addr, vsewb);
trace_data->mem_addrs.at(tid).push_back({mem_addr, vsewb});
}
}
break;
}
default:
std::cout << "Store vector - unsupported mop: " << mop << std::endl;
std::abort();
}
}
void configure(const Instr &instr, uint32_t wid, uint32_t tid,
const std::vector<reg_data_t>& rs1_data,
const std::vector<reg_data_t>& rs2_data,
std::vector<reg_data_t>& rd_data,
ExeTraceData* trace_data) {
auto& states = vpu_states_.at(wid);
auto op_type = instr.getOpType();
auto instrArgs = instr.getArgs();
uint32_t rdest = instr.getDestReg().idx;
uint32_t rsrc0 = instr.getSrcReg(0).idx;
auto vset_type = std::get<VsetType>(op_type);
auto vsetArgs = std::get<IntrVsetArgs>(instrArgs);
uint32_t uimmsrc = vsetArgs.uimm;
uint32_t immsrc = vsetArgs.zimm;
uint32_t zimm;
if (vset_type != VsetType::VSETVL) {
zimm = immsrc;
} else {
zimm = rs2_data.at(tid).i;
}
uint32_t vlmul = zimm & mask_vlmul;
uint32_t vsew = (zimm >> shift_vsew) & mask_vsew;
uint32_t vta = (zimm >> shift_vta) & mask_vta;
uint32_t vma = (zimm >> shift_vma) & mask_vma;
uint32_t vlmul_neg = (vlmul >> 2);
uint32_t vlen_mul = vlmul_neg ? (VLENB >> (8 - vlmul)) : (VLENB << vlmul);
uint32_t vlmax = vlen_mul >> vsew;
uint32_t vill = ((1u << vsew) > XLENB) || (vlmax > VLEN);
uint32_t vl;
if (vset_type == VsetType::VSETIVLI) {
// vsetivli
vl = uimmsrc;
} else {
// vsetvli/vsetvl
vl = (rsrc0 != 0) ? rs1_data.at(tid).i : ((rdest != 0) ? vlmax : states.vl);
}
// clamp vl to vlmax
vl = std::min(vl, vlmax);
if (vill) {
vl = 0;
vma = 0;
vta = 0;
vsew = 0;
vlmul = 0;
}
DP(4, "Vset(i)vl(i) - vill: " << vill << " vma: " << vma << " vta: " << vta << " lmul: " << vlmul << " sew: " << vsew << " vl: " << vl << " vlmax: " << vlmax);
// update the vector unit state
states.vstart = 0;
states.vlmax = vlmax;
states.vtype.vill = vill;
states.vtype.vma = vma;
states.vtype.vta = vta;
states.vtype.vsew = vsew;
states.vtype.vlmul = vlmul;
states.vl = vl;
// return value is the new vl
rd_data.at(tid).i = vl;
// udpate trace data
trace_data->vl = states.vl;
trace_data->vlmul = 1;
trace_data->vpu_op = VpuOpType::VSET;
}
void execute(const Instr &instr, uint32_t wid, uint32_t tid,
const std::vector<reg_data_t>& rs1_data,
std::vector<reg_data_t>& rd_data,
ExeTraceData* trace_data) {
auto& states = vpu_states_.at(wid);
auto& vreg_file = states.vreg_file.at(tid);
auto op_type = instr.getOpType();
auto instrArgs = instr.getArgs();
uint32_t rdest = instr.getDestReg().idx;
uint32_t rsrc0 = instr.getSrcReg(0).idx;
uint32_t rsrc1 = instr.getSrcReg(1).idx;
VpuOpType vpu_op;
auto vop_type = std::get<VopType>(op_type);
auto vopArgs = std::get<IntrVopArgs>(instrArgs);
uint32_t vmask = vopArgs.vm;
uint32_t uimmsrc = vopArgs.imm;
uint32_t funct6 = vopArgs.funct6;
Word immsrc = sext<Word>(vopArgs.imm, width_reg);
switch (vop_type) {
case VopType::OPIVV: { // vector-vector
switch (funct6) {
case 0: { // vadd.vv
vector_op_vv<Add, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 2: { // vsub.vv
vector_op_vv<Sub, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 4: { // vminu.vv
vector_op_vv<Min, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 5: { // vmin.vv
vector_op_vv<Min, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 6: { // vmaxu.vv
vector_op_vv<Max, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 7: { // vmax.vv
vector_op_vv<Max, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 9: { // vand.vv
vector_op_vv<And, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 10: { // vor.vv
vector_op_vv<Or, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 11: { // vxor.vv
vector_op_vv<Xor, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 12: { // vrgather.vv
vector_op_vv_gather<uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, false, states.vlmax, vmask);
} break;
case 14: { // vrgatherei16.vv
vector_op_vv_gather<uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, true, states.vlmax, vmask);
} break;
case 16: { // vadc.vvm
vector_op_vv_carry<Adc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl);
} break;
case 17: { // vmadc.vv, vmadc.vvm
vector_op_vv_carry_out<Madc, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 18: { // vsbc.vvm
vector_op_vv_carry<Sbc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl);
} break;
case 19: { // vmsbc.vv, vmsbc.vvm
vector_op_vv_carry_out<Msbc, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 23: {
if (vmask) { // vmv.v.v
if (rsrc1 != 0) {
std::cout << "For vmv.v.v vs2 must contain v0." << std::endl;
std::abort();
}
vector_op_vv<Mv, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} else { // vmerge.vvm
vector_op_vv_merge<int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
}
} break;
case 24: { // vmseq.vv
vector_op_vv_mask<Eq, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 25: { // vmsne.vv
vector_op_vv_mask<Ne, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 26: { // vmsltu.vv
vector_op_vv_mask<Lt, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 27: { // vmslt.vv
vector_op_vv_mask<Lt, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 28: { // vmsleu.vv
vector_op_vv_mask<Le, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 29: { // vmsle.vv
vector_op_vv_mask<Le, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 30: { // vmsgtu.vv
vector_op_vv_mask<Gt, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 31: { // vmsgt.vv
vector_op_vv_mask<Gt, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 32: { // vsaddu.vv
vector_op_vv_sat<Sadd, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 33: { // vsadd.vv
vector_op_vv_sat<Sadd, int8_t, int16_t, int32_t, int64_t, __int128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 34: { // vssubu.vv
vector_op_vv_sat<Ssubu, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 35: { // vssub.vv
vector_op_vv_sat<Ssub, int8_t, int16_t, int32_t, int64_t, __int128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 37: { // vsll.vv
vector_op_vv<Sll, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 39: { // vsmul.vv
vector_op_vv_sat<Smul, int8_t, int16_t, int32_t, int64_t, __int128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
case 40: { // vsrl.vv
vector_op_vv<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vsra.vv
vector_op_vv<SrlSra, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 42: { // vssrl.vv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_scale<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 43: { // vssra.vv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_scale<SrlSra, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 44: { // vnsrl.wv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_n<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, vxsat);
} break;
case 45: { // vnsra.wv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_n<SrlSra, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, vxsat);
} break;
case 46: { // vnclipu.wv
vector_op_vv_n<Clip, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
case 47: { // vnclip.wv
vector_op_vv_n<Clip, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
case 48: { // vwredsumu.vs
vector_op_vv_red_w<Add, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 49: { // vwredsum.vs
vector_op_vv_red_w<Add, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
default:
std::cout << "Unrecognised vector - vector instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
case VopType::OPFVV: { // float vector-vector
switch (funct6) {
case 0: { // vfadd.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 2: { // vfsub.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fsub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 1: // vfredusum.vs - treated the same as vfredosum.vs
case 3: { // vfredosum.vs
vpu_op = VpuOpType::FMA_R;
vector_op_vv_red<Fadd, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 4: { // vfmin.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv<Fmin, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 5: { // vfredmin.vs
vpu_op = VpuOpType::FNCP_R;
vector_op_vv_red<Fmin, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 6: { // vfmax.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv<Fmax, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 7: { // vfredmax.vs
vpu_op = VpuOpType::FNCP_R;
vector_op_vv_red<Fmax, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 8: { // vfsgnj.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv<Fsgnj, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 9: { // vfsgnjn.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv<Fsgnjn, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 10: { // vfsgnjx.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv<Fsgnjx, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 16: { // vfmv.f.s
vpu_op = VpuOpType::FNCP;
WordI result = 0;
vector_op_scalar(&result, vreg_file, rsrc0, rsrc1, states.vtype.vsew);
DP(4, "Moved " << result << " from: " << +rsrc1 << " to: " << +rdest);
rd_data.at(tid).i = result;
} break;
case 18: {
vpu_op = VpuOpType::FCVT;
switch (rsrc0 >> 3) {
case 0b00: // vfcvt.xu.f.v, vfcvt.x.f.v, vfcvt.f.xu.v, vfcvt.f.x.v, vfcvt.rtz.xu.f.v, vfcvt.rtz.x.f.v
vector_op_vix<Fcvt, uint8_t, uint16_t, uint32_t, uint64_t>(rsrc0, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
break;
case 0b01: // vfwcvt.xu.f.v, vfwcvt.x.f.v, vfwcvt.f.xu.v, vfwcvt.f.x.v, vfwcvt.f.f.v, vfwcvt.rtz.xu.f.v, vfwcvt.rtz.x.f.v
vector_op_vix_w<Fcvt, uint8_t, uint16_t, uint32_t, uint64_t>(rsrc0, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
break;
case 0b10: { // vfncvt.xu.f.w, vfncvt.x.f.w, vfncvt.f.xu.w, vfncvt.f.x.w, vfncvt.f.f.w, vfncvt.rod.f.f.w, vfncvt.rtz.xu.f.w, vfncvt.rtz.x.f.w
uint32_t vxsat = 0; // saturation argument is unused
vector_op_vix_n<Fcvt, uint8_t, uint16_t, uint32_t, uint64_t>(rsrc0, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
break;
}
default:
std::cout << "Fcvt unsupported value for rsrc0: " << rsrc0 << std::endl;
std::abort();
}
} break;
case 19: { // vfsqrt.v, vfrsqrt7.v, vfrec7.v, vfclass.v
vpu_op = VpuOpType::FSQRT;
vector_op_vix<Funary1, uint8_t, uint16_t, uint32_t, uint64_t>(rsrc0, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 24: { // vmfeq.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv_mask<Feq, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 25: { // vmfle.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv_mask<Fle, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 27: { // vmflt.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv_mask<Flt, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 28: { // vmfne.vv
vpu_op = VpuOpType::FNCP;
vector_op_vv_mask<Fne, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 32: { // vfdiv.vv
vpu_op = VpuOpType::FDIV;
vector_op_vv<Fdiv, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 36: { // vfmul.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fmul, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 40: { // vfmadd.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fmadd, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vfnmadd.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fnmadd, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 42: { // vfmsub.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fmsub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 43: { // vfnmsub.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fnmsub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 44: { // vfmacc.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fmacc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 45: { // vfnmacc.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fnmacc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 46: { // vfmsac.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fmsac, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 47: { // vfnmsac.vv
vpu_op = VpuOpType::FMA;
vector_op_vv<Fnmsac, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 48: { // vfwadd.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 51: // vfwredosum.vs
case 49: { // vfwredusum.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_red_wf<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 50: { // vfwsub.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fsub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 52: { // vfwadd.wv
vpu_op = VpuOpType::FMA;
vector_op_vv_wfv<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 54: { // vfwsub.wv
vpu_op = VpuOpType::FMA;
vector_op_vv_wfv<Fsub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 56: { // vfwmul.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fmul, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 60: { // vfwmacc.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fmacc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 61: { // vfwnmacc.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fnmacc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 62: { // vfwmsac.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fmsac, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 63: { // vfwnmsac.vv
vpu_op = VpuOpType::FMA;
vector_op_vv_w<Fnmsac, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
default:
std::cout << "Unrecognised float vector - vector instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
case VopType::OPMVV: { // mask vector-vector
switch (funct6) {
case 0: { // vredsum.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Add, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 1: { // vredand.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<And, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 2: { // vredor.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Or, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 3: { // vredxor.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Xor, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 4: { // vredminu.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Min, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 5: { // vredmin.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Min, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 6: { // vredmaxu.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Max, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 7: { // vredmax.vs
vpu_op = VpuOpType::ARITH_R;
vector_op_vv_red<Max, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 8: { // vaaddu.vv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_sat<Aadd, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 9: { // vaadd.vv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_sat<Aadd, int8_t, int16_t, int32_t, int64_t, __int128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 10: { // vasubu.vv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_sat<Asub, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 11: { // vasub.vv
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vv_sat<Asub, int8_t, int16_t, int32_t, int64_t, __int128_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 16: { // vmv.x.s
WordI result = 0;
vector_op_scalar(&result, vreg_file, rsrc0, rsrc1, states.vtype.vsew);
DP(4, "Moved " << result << " from: " << +rsrc1 << " to: " << +rdest);
rd_data.at(tid).i = result;
} break;
case 18: { // vzext.vf8, vsext.vf8, vzext.vf4, vsext.vf4, vzext.vf2, vsext.vf2
bool negativeLmul = states.vtype.vlmul >> 2;
uint32_t illegalLmul = negativeLmul && !((8 >> (0x8 - states.vtype.vlmul)) >> (0x4 - (rsrc0 >> 1)));
if (illegalLmul) {
std::cout << "Lmul*vf<1/8 is not supported by vzext and vsext." << std::endl;
std::abort();
}
vector_op_vix_ext<Xunary0>(rsrc0, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 20: { // vid.v
vector_op_vid(vreg_file, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 23: { // vcompress.vm
vector_op_vv_compress<uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl);
} break;
case 24: { // vmandn.mm
vector_op_vv_mask<AndNot>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 25: { // vmand.mm
vector_op_vv_mask<And>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 26: { // vmor.mm
vector_op_vv_mask<Or>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 27: { // vmxor.mm
vector_op_vv_mask<Xor>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 28: { // vmorn.mm
vector_op_vv_mask<OrNot>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 29: { // vmnand.mm
vector_op_vv_mask<Nand>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 30: { // vmnor.mm
vector_op_vv_mask<Nor>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 31: { // vmxnor.mm
vector_op_vv_mask<Xnor>(vreg_file, rsrc0, rsrc1, rdest, states.vl);
} break;
case 32: { // vdivu.vv
vpu_op = VpuOpType::IDIV;
vector_op_vv<Div, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 33: { // vdiv.vv
vpu_op = VpuOpType::IDIV;
vector_op_vv<Div, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 34: { // vremu.vv
vpu_op = VpuOpType::IDIV;
vector_op_vv<Rem, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 35: { // vrem.vv
vpu_op = VpuOpType::IDIV;
vector_op_vv<Rem, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 36: { // vmulhu.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Mulhu, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 37: { // vmul.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Mul, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 38: { // vmulhsu.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Mulhsu, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 39: { // vmulh.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Mulh, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vmadd.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Madd, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 43: { // vnmsub.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Nmsub, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 45: { // vmacc.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Macc, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 47: { // vnmsac.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv<Nmsac, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 48: { // vwaddu.vv
vector_op_vv_w<Add, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 49: { // vwadd.vv
vector_op_vv_w<Add, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 50: { // vwsubu.vv
vector_op_vv_w<Sub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 51: { // vwsub.vv
vector_op_vv_w<Sub, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 52: { // vwaddu.wv
vector_op_vv_wv<Add, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 53: { // vwadd.wv
vector_op_vv_wv<Add, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 54: { // vwsubu.wv
vector_op_vv_wv<Sub, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 55: { // vwsub.wv
vector_op_vv_wv<Sub, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 56: { // vwmulu.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv_w<Mul, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 58: { // vwmulsu.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv_w<Mulsu, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 59: { // vwmul.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv_w<Mul, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 60: { // vwmaccu.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv_w<Macc, uint8_t, uint16_t, uint32_t, uint64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 61: { // vwmacc.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv_w<Macc, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 63: { // vwmaccsu.vv
vpu_op = VpuOpType::IMUL;
vector_op_vv_w<Maccsu, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
default:
std::cout << "Unrecognised mask vector - vector instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
case VopType::OPIVI: { // vector-immmediate
vpu_op = VpuOpType::ARITH;
switch (funct6) {
case 0: { // vadd.vi
vector_op_vix<Add, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 3: { // vrsub.vi
vector_op_vix<Rsub, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 9: { // vand.vi
vector_op_vix<And, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 10: { // vor.vi
vector_op_vix<Or, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 11: { // vxor.vi
vector_op_vix<Xor, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 12: { // vrgather.vi
vector_op_vix_gather<uint8_t, uint16_t, uint32_t, uint64_t>(uimmsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, states.vlmax, vmask);
} break;
case 14: { // vslideup.vi
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(uimmsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, 0, vmask, false);
} break;
case 15: { // vslidedown.vi
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(uimmsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, states.vlmax, vmask, false);
} break;
case 16: { // vadc.vim
vector_op_vix_carry<Adc, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl);
} break;
case 17: { // vmadc.vi, vmadc.vim
vector_op_vix_carry_out<Madc, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 23: { // vmv.v.i
if (vmask) { // vmv.v.i
if (rsrc0 != 0) {
std::cout << "For vmv.v.i vs2 must contain v0." << std::endl;
std::abort();
}
vector_op_vix<Mv, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} else { // vmerge.vim
vector_op_vix_merge<int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
}
} break;
case 24: { // vmseq.vi
vector_op_vix_mask<Eq, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 25: { // vmsne.vi
vector_op_vix_mask<Ne, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 26: { // vmsltu.vi
vector_op_vix_mask<Lt, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 27: { // vmslt.vi
vector_op_vix_mask<Lt, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 28: { // vmsleu.vi
vector_op_vix_mask<Le, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 29: { // vmsle.vi
vector_op_vix_mask<Le, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 30: { // vmsgtu.vi
vector_op_vix_mask<Gt, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 31: { // vmsgt.vi
vector_op_vix_mask<Gt, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 32: { // vsaddu.vi
vector_op_vix_sat<Sadd, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 33: { // vsadd.vi
vector_op_vix_sat<Sadd, int8_t, int16_t, int32_t, int64_t, __int128_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 37: { // vsll.vi
vector_op_vix<Sll, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 39: { // vmv1r.v, vmv2r.v, vmv4r.v, vmv8r.v
uint32_t nreg = (immsrc & 0b111) + 1;
if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) {
std::cout << "Reserved value for nreg: " << nreg << std::endl;
std::abort();
}
uint32_t vl = (nreg * VLENB) >> states.vtype.vsew;
trace_data->vl = vl;
vector_op_vv<Mv, int8_t, int16_t, int32_t, int64_t>(vreg_file, rsrc0, rsrc1, rdest, states.vtype.vsew, vl, vmask);
} break;
case 40: { // vsrl.vi
vector_op_vix<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vsra.vi
vector_op_vix<SrlSra, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 42: { // vssrl.vi
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_scale<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 43: { // vssra.vi
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_scale<SrlSra, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 44: { // vnsrl.wi
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_n<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, 2, vxsat);
} break;
case 45: { // vnsra.wi
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_n<SrlSra, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, 2, vxsat);
} break;
case 46: { // vnclipu.wi
vector_op_vix_n<Clip, uint8_t, uint16_t, uint32_t, uint64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
case 47: { // vnclip.wi
vector_op_vix_n<Clip, int8_t, int16_t, int32_t, int64_t>(immsrc, vreg_file, rsrc0, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
default:
std::cout << "Unrecognised vector - immidiate instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
case VopType::OPIVX: { // vector-scalar
vpu_op = VpuOpType::ARITH;
auto rs1_value = rs1_data.at(tid).i;
switch (funct6) {
case 0: { // vadd.vx
vector_op_vix<Add, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 2: { // vsub.vx
vector_op_vix<Sub, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 3: { // vrsub.vx
vector_op_vix<Rsub, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 4: { // vminu.vx
vector_op_vix<Min, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 5: { // vmin.vx
vector_op_vix<Min, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 6: { // vmaxu.vx
vector_op_vix<Max, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 7: { // vmax.vx
vector_op_vix<Max, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 9: { // vand.vx
vector_op_vix<And, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 10: { // vor.vx
vector_op_vix<Or, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 11: { // vxor.vx
vector_op_vix<Xor, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 12: { // vrgather.vx
vector_op_vix_gather<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, states.vlmax, vmask);
} break;
case 14: { // vslideup.vx
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, 0, vmask, false);
} break;
case 15: { // vslidedown.vx
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, states.vlmax, vmask, false);
} break;
case 16: { // vadc.vxm
vector_op_vix_carry<Adc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl);
} break;
case 17: { // vmadc.vx, vmadc.vxm
vector_op_vix_carry_out<Madc, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 18: { // vsbc.vxm
vector_op_vix_carry<Sbc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl);
} break;
case 19: { // vmsbc.vx, vmsbc.vxm
vector_op_vix_carry_out<Msbc, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 23: {
if (vmask) { // vmv.v.x
if (rsrc1 != 0) {
std::cout << "For vmv.v.x vs2 must contain v0." << std::endl;
std::abort();
}
vector_op_vix<Mv, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} else { // vmerge.vxm
vector_op_vix_merge<int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
}
} break;
case 24: { // vmseq.vx
vector_op_vix_mask<Eq, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 25: { // vmsne.vx
vector_op_vix_mask<Ne, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 26: { // vmsltu.vx
vector_op_vix_mask<Lt, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 27: { // vmslt.vx
vector_op_vix_mask<Lt, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 28: { // vmsleu.vx
vector_op_vix_mask<Le, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 29: { // vmsle.vx
vector_op_vix_mask<Le, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 30: { // vmsgtu.vx
vector_op_vix_mask<Gt, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 31: { // vmsgt.vx
vector_op_vix_mask<Gt, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 32: { // vsaddu.vx
vector_op_vix_sat<Sadd, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 33: { // vsadd.vx
vector_op_vix_sat<Sadd, int8_t, int16_t, int32_t, int64_t, __int128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 34: { // vssubu.vx
vector_op_vix_sat<Ssubu, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 35: { // vssub.vx
vector_op_vix_sat<Ssub, int8_t, int16_t, int32_t, int64_t, __int128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, states.vxsat);
} break;
case 37: { // vsll.vx
vector_op_vix<Sll, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 39: { // vsmul.vx
vector_op_vix_sat<Smul, int8_t, int16_t, int32_t, int64_t, __int128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
case 40: { // vsrl.vx
vector_op_vix<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vsra.vx
vector_op_vix<SrlSra, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 42: { // vssrl.vx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_scale<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 43: { // vssra.vx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_scale<SrlSra, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 44: { // vnsrl.wx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_n<SrlSra, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, vxsat);
} break;
case 45: { // vnsra.wx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_n<SrlSra, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, 2, vxsat);
} break;
case 46: { // vnclipu.wx
vector_op_vix_n<Clip, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
case 47: { // vnclip.wx
vector_op_vix_n<Clip, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, states.vxsat);
} break;
default:
std::cout << "Unrecognised vector - scalar instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
case VopType::OPFVF: { // float vector-scalar
auto rs1_value = rs1_data.at(tid).i;
switch (funct6) {
case 0: { // vfadd.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 2: { // vfsub.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fsub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 4: { // vfmin.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix<Fmin, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 6: { // vfmax.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix<Fmax, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 8: { // vfsgnj.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix<Fsgnj, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 9: { // vfsgnjn.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix<Fsgnjn, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 10: { // vfsgnjx.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix<Fsgnjx, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 14: { // vfslide1up.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, 0, vmask, true);
} break;
case 15: { // vfslide1down.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, states.vlmax, vmask, true);
} break;
case 16: { // vfmv.s.f
vpu_op = VpuOpType::FNCP;
if (rsrc1 != 0) {
std::cout << "For vfmv.s.f vs2 must contain v0." << std::endl;
std::abort();
}
uint32_t vl = std::min(states.vl, (uint32_t)1);
trace_data->vl = vl;
vector_op_vix<Mv, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, vl, vmask);
} break;
case 24: { // vmfeq.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_mask<Feq, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 23: {
vpu_op = VpuOpType::FNCP;
if (vmask) { // vfmv.v.f
if (rsrc1 != 0) {
std::cout << "For vfmv.v.f vs2 must contain v0." << std::endl;
std::abort();
}
vector_op_vix<Mv, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} else { // vfmerge.vfm
vector_op_vix_merge<int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
}
} break;
case 25: { // vmfle.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_mask<Fle, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 27: { // vmflt.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_mask<Flt, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 28: { // vmfne.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_mask<Fne, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 29: { // vmfgt.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_mask<Fgt, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 31: { // vmfge.vf
vpu_op = VpuOpType::FNCP;
vector_op_vix_mask<Fge, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 32: { // vfdiv.vf
vpu_op = VpuOpType::FDIV;
vector_op_vix<Fdiv, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 33: { // vfrdiv.vf
vpu_op = VpuOpType::FDIV;
vector_op_vix<Frdiv, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 36: { // vfmul.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fmul, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 39: { // vfrsub.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Frsub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 40: { // vfmadd.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fmadd, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vfnmadd.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fnmadd, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 42: { // vfmsub.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fmsub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 43: { // vfnmsub.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fnmsub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 44: { // vfmacc.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fmacc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 45: { // vfnmacc.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fnmacc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 46: { // vfmsac.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fmsac, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 47: { // vfnmsac.vf
vpu_op = VpuOpType::FMA;
vector_op_vix<Fnmsac, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 48: { // vfwadd.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 50: { // vfwsub.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fsub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 52: { // vfwadd.wf
vpu_op = VpuOpType::FMA;
uint64_t src1_d = rv_ftod(rs1_value);
vector_op_vix_wx<Fadd, uint8_t, uint16_t, uint32_t, uint64_t>(src1_d, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 54: { // vfwsub.wf
vpu_op = VpuOpType::FMA;
uint64_t src1_d = rv_ftod(rs1_value);
vector_op_vix_wx<Fsub, uint8_t, uint16_t, uint32_t, uint64_t>(src1_d, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 56: { // vfwmul.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fmul, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 60: { // vfwmacc.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fmacc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 61: { // vfwnmacc.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fnmacc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 62: { // vfwmsac.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fmsac, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 63: { // vfwnmsac.vf
vpu_op = VpuOpType::FMA;
vector_op_vix_w<Fnmsac, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
default:
std::cout << "Unrecognised float vector - scalar instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
case VopType::OPMVX: { // vector-scalar
vpu_op = VpuOpType::ARITH;
auto rs1_value = rs1_data.at(tid).i;
switch (funct6) {
case 8: { // vaaddu.vx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_sat<Aadd, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 9: { // vaadd.vx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_sat<Aadd, int8_t, int16_t, int32_t, int64_t, __int128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 10: { // vasubu.vx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_sat<Asub, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 11: { // vasub.vx
uint32_t vxsat = 0; // saturation is not relevant for this operation
vector_op_vix_sat<Asub, int8_t, int16_t, int32_t, int64_t, __int128_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask, states.vxrm, vxsat);
} break;
case 14: { // vslide1up.vx
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, 0, vmask, true);
} break;
case 15: { // vslide1down.vx
vector_op_vix_slide<uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, states.vlmax, vmask, true);
} break;
case 16: { // vmv.s.x
if (rsrc1 != 0) {
std::cout << "For vmv.s.x vs2 must contain v0." << std::endl;
std::abort();
}
uint32_t vl = std::min(states.vl, (uint32_t)1);
trace_data->vl = vl;
vector_op_vix<Mv, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, vl, vmask);
} break;
case 32: { // vdivu.vx
vpu_op = VpuOpType::IDIV;
vector_op_vix<Div, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 33: { // vdiv.vx
vpu_op = VpuOpType::IDIV;
vector_op_vix<Div, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 34: { // vremu.vx
vpu_op = VpuOpType::IDIV;
vector_op_vix<Rem, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 35: { // vrem.vx
vpu_op = VpuOpType::IDIV;
vector_op_vix<Rem, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 36: { // vmulhu.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Mulhu, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 37: { // vmul.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Mul, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 38: { // vmulhsu.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Mulhsu, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 39: { // vmulh.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Mulh, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 41: { // vmadd.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Madd, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 43: { // vnmsub.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Nmsub, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 45: { // vmacc.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Macc, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 47: { // vnmsac.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix<Nmsac, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 48: { // vwaddu.vx
vector_op_vix_w<Add, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 49: { // vwadd.vx
vector_op_vix_w<Add, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 50: { // vwsubu.vx
vector_op_vix_w<Sub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 51: { // vwsub.vx
vector_op_vix_w<Sub, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 52: { // vwaddu.wx
vector_op_vix_wx<Add, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 53: { // vwadd.wx
uint32_t vsew_bits = 1 << (3 + states.vtype.vsew);
Word src1_ext = sext(rs1_value, vsew_bits);
vector_op_vix_wx<Add, int8_t, int16_t, int32_t, int64_t>(src1_ext, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 54: { // vwsubu.wx
vector_op_vix_wx<Sub, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 55: { // vwsub.wx
uint32_t vsew_bits = 1 << (3 + states.vtype.vsew);
Word src1_ext = sext(rs1_value, vsew_bits);
vector_op_vix_wx<Sub, int8_t, int16_t, int32_t, int64_t>(src1_ext, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 56: { // vwmulu.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Mul, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 58: { // vwmulsu.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Mulsu, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 59: { // vwmul.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Mul, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 60: { // vwmaccu.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Macc, uint8_t, uint16_t, uint32_t, uint64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 61: { // vwmacc.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Macc, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 62: { // vwmaccus.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Maccus, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
case 63: { // vwmaccsu.vx
vpu_op = VpuOpType::IMUL;
vector_op_vix_w<Maccsu, int8_t, int16_t, int32_t, int64_t>(rs1_value, vreg_file, rsrc1, rdest, states.vtype.vsew, states.vl, vmask);
} break;
default:
std::cout << "Unrecognised vector - scalar instruction funct6: " << funct6 << std::endl;
std::abort();
}
} break;
default:
std::abort();
}
// udpate trace data
trace_data->vl = states.vl;
trace_data->vlmul = 1;
trace_data->vpu_op = vpu_op;
}
bool get_csr(uint32_t addr, uint32_t wid, uint32_t tid, Word* value) {
__unused (tid);
switch (addr) {
case VX_CSR_VSTART:
*value = vpu_states_.at(wid).vstart;
return true;
case VX_CSR_VXSAT:
*value = vpu_states_.at(wid).vxsat;
return true;
case VX_CSR_VXRM:
*value = vpu_states_.at(wid).vxrm;
return true;
case VX_CSR_VCSR:
*value = (vpu_states_.at(wid).vxrm << 1) | vpu_states_.at(wid).vxsat;
return true;
case VX_CSR_VL:
*value = vpu_states_.at(wid).vl;
return true;
case VX_CSR_VTYPE:
*value = vpu_states_.at(wid).vtype.value;
return true;
case VX_CSR_VLENB:
*value = VLENB;
return true;
default:
return false;
}
}
bool set_csr(uint32_t addr,uint32_t wid, uint32_t tid, Word value) {
__unused (tid);
switch (addr) {
case VX_CSR_VSTART:
vpu_states_.at(wid).vstart = value;
return true;
case VX_CSR_VXSAT:
vpu_states_.at(wid).vxsat = value & 0b1;
return true;
case VX_CSR_VXRM:
vpu_states_.at(wid).vxrm = value & 0b11;
return true;
case VX_CSR_VCSR:
vpu_states_.at(wid).vxsat = value & 0b1;
vpu_states_.at(wid).vxrm = (value >> 1) & 0b11;
return true;
case VX_CSR_VL: // read only
case VX_CSR_VTYPE:
case VX_CSR_VLENB:
std::abort();
[[fallthrough]];
default:
return false;
}
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
private:
struct pending_req_t {
instr_trace_t* trace;
uint32_t count;
};
union vtype_t {
struct {
uint32_t vlmul : 3; // vector register group multiplier
uint32_t vsew : 3; // vector element width
uint32_t vta : 1; // vector tail agnostic
uint32_t vma : 1; // vector mask agnostic
uint32_t reserved: 23;
uint32_t vill : 1; // illegal vtype
};
uint32_t value;
};
struct vpu_states_t {
std::vector<VRF_t> vreg_file;
uint32_t vstart;
uint32_t vxsat;
uint32_t vxrm;
uint32_t vl;
vtype_t vtype;
uint32_t vlenb;
uint32_t vlmax;
vpu_states_t(uint32_t num_threads)
: vreg_file(num_threads, std::vector(MAX_NUM_REGS, std::vector<Byte>(VLENB, 0)))
, vstart(0)
, vxsat(0)
, vxrm(0)
, vl(0)
, vtype({0, 0, 0, 0, 0, 0})
, vlenb(VLENB)
, vlmax(0)
{}
void reset() {
for (auto& reg_file : this->vreg_file) {
for (auto& reg : reg_file) {
for (auto& elm : reg) {
#ifndef NDEBUG
elm = 0;
#else
elm = std::rand();
#endif
}
}
}
}
};
VecUnit* simobject_;
Core* core_;
std::vector<vpu_states_t> vpu_states_;
uint32_t num_lanes_;
HashTable<pending_req_t> pending_reqs_;
PerfStats perf_stats_;
};
///////////////////////////////////////////////////////////////////////////////
VecUnit::VecUnit(const SimContext& ctx,
const char* name,
const Arch& arch,
Core* core)
: SimObject<VecUnit>(ctx, name)
, Inputs(ISSUE_WIDTH, this)
, Outputs(ISSUE_WIDTH, this)
, impl_(new Impl(this, arch, core))
{}
VecUnit::~VecUnit() {
delete impl_;
}
void VecUnit::reset() {
impl_->reset();
}
void VecUnit::tick() {
impl_->tick();
}
bool VecUnit::get_csr(uint32_t addr, uint32_t wid, uint32_t tid, Word* value) {
return impl_->get_csr(addr, wid, tid, value);
}
bool VecUnit::set_csr(uint32_t addr, uint32_t wid, uint32_t tid, Word value) {
return impl_->set_csr(addr, wid, tid, value);
}
void VecUnit::load(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, const std::vector<reg_data_t>& rs2_data, MemTraceData* trace_data) {
impl_->load(instr, wid, tid, rs1_data, rs2_data, trace_data);
}
void VecUnit::store(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, const std::vector<reg_data_t>& rs2_data, MemTraceData* trace_data) {
impl_->store(instr, wid, tid, rs1_data, rs2_data, trace_data);
}
void VecUnit::configure(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, const std::vector<reg_data_t>& rs2_data, std::vector<reg_data_t>& rd_data, ExeTraceData* trace_data) {
impl_->configure(instr, wid, tid, rs1_data, rs2_data, rd_data, trace_data);
}
void VecUnit::execute(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, std::vector<reg_data_t>& rd_data, ExeTraceData* trace_data) {
impl_->execute(instr, wid, tid, rs1_data, rd_data, trace_data);
}
const VecUnit::PerfStats& VecUnit::perf_stats() const {
return impl_->perf_stats();
}