vortex/sim/simx/emulator.cpp
2025-02-09 17:53:09 -08:00

736 lines
22 KiB
C++

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <assert.h>
#include <util.h>
#include "emulator.h"
#include "instr_trace.h"
#include "instr.h"
#include "dcrs.h"
#include "core.h"
#include "socket.h"
#include "cluster.h"
#include "processor_impl.h"
#include "local_mem.h"
using namespace vortex;
Emulator::warp_t::warp_t(const Arch& arch)
: ireg_file(arch.num_threads(), std::vector<Word>(MAX_NUM_REGS))
, freg_file(arch.num_threads(), std::vector<uint64_t>(MAX_NUM_REGS))
#ifdef EXT_V_ENABLE
, vreg_file(MAX_NUM_REGS, std::vector<Byte>(VLEN / 8))
#endif
, uuid(0)
{}
void Emulator::warp_t::clear(uint64_t startup_addr) {
this->PC = startup_addr;
this->tmask.reset();
this->uuid = 0;
this->fcsr = 0;
for (auto& reg_file : this->ireg_file) {
for (auto& reg : reg_file) {
#ifndef NDEBUG
reg = 0;
#else
reg = std::rand();
#endif
}
reg_file.at(0) = 0; // r0 = 0
}
for (auto& reg_file : this->freg_file) {
for (auto& reg : reg_file) {
#ifndef NDEBUG
reg = 0;
#else
reg = std::rand();
#endif
}
}
#ifdef EXT_V_ENABLE
for (auto& reg_file : this->vreg_file) {
for (auto& reg : reg_file) {
#ifndef NDEBUG
reg = 0;
#else
reg = std::rand();
#endif
}
}
this->vtype = {0, 0, 0, 0, 0};
this->vl = 0;
this->vlmax = 0;
#endif
}
///////////////////////////////////////////////////////////////////////////////
Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
: arch_(arch)
, dcrs_(dcrs)
, core_(core)
, warps_(arch.num_warps(), arch)
, barriers_(arch.num_barriers(), 0)
, ipdom_size_(arch.num_threads()-1)
// [TBC] Currently, tradeoff between scratchpad size & performance has not been evaluated. Scratchpad is
// considered to be big enough to hold input tiles for one output tile.
// In future versions, scratchpad size should be fixed to an appropriate value.
, scratchpad(std::vector<Word>(32 * 32 * 32768))
#ifdef EXT_V_ENABLE
, vec_unit_(core->vec_unit())
, csrs_(arch.num_warps())
#endif
{
std::srand(50);
#ifdef EXT_V_ENABLE
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
csrs_.at(i).resize(arch.num_threads());
}
#endif
this->clear();
}
Emulator::~Emulator() {
this->cout_flush();
}
void Emulator::clear() {
uint64_t startup_addr = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
#if (XLEN == 64)
startup_addr |= (uint64_t(dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32);
#endif
uint64_t startup_arg = dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG0);
#if (XLEN == 64)
startup_arg |= (uint64_t(dcrs_.base_dcrs.read(VX_DCR_BASE_STARTUP_ARG1)) << 32);
#endif
for (auto& warp : warps_) {
warp.clear(startup_addr);
}
for (auto& barrier : barriers_) {
barrier.reset();
}
#ifdef EXT_V_ENABLE
vec_unit_->reset();
#endif
csr_mscratch_ = startup_arg;
stalled_warps_.reset();
active_warps_.reset();
// activate first warp and thread
active_warps_.set(0);
warps_[0].tmask.set(0);
wspawn_.valid = false;
for (auto& reg : scratchpad) {
reg = 0;
}
}
void Emulator::attach_ram(RAM* ram) {
// bind RAM to memory unit
#if (XLEN == 64)
mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39
#else
mmu_.attach(*ram, 0, 0xFFFFFFFF);
#endif
}
instr_trace_t* Emulator::step() {
int scheduled_warp = -1;
// process pending wspawn
if (wspawn_.valid && active_warps_.count() == 1) {
DP(3, "*** Activate " << (wspawn_.num_warps-1) << " warps at PC: " << std::hex << wspawn_.nextPC << std::dec);
for (uint32_t i = 1; i < wspawn_.num_warps; ++i) {
auto& warp = warps_.at(i);
warp.PC = wspawn_.nextPC;
warp.tmask.set(0);
active_warps_.set(i);
}
wspawn_.valid = false;
stalled_warps_.reset(0);
}
// find next ready warp
for (size_t wid = 0, nw = arch_.num_warps(); wid < nw; ++wid) {
bool warp_active = active_warps_.test(wid);
bool warp_stalled = stalled_warps_.test(wid);
if (warp_active && !warp_stalled) {
scheduled_warp = wid;
break;
}
}
if (scheduled_warp == -1)
return nullptr;
// suspend warp until decode
auto& warp = warps_.at(scheduled_warp);
assert(warp.tmask.any());
#ifndef NDEBUG
// generate unique universal instruction ID
uint32_t instr_uuid = warp.uuid++;
uint32_t g_wid = core_->id() * arch_.num_warps() + scheduled_warp;
uint64_t uuid = (uint64_t(g_wid) << 32) | instr_uuid;
#else
uint64_t uuid = 0;
#endif
DP(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask=" << ThreadMaskOS(warp.tmask, arch_.num_threads())
<< ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")");
// Fetch
uint32_t instr_code = 0;
this->icache_read(&instr_code, warp.PC, sizeof(uint32_t));
// Decode
auto instr = this->decode(instr_code);
if (!instr) {
std::cout << "Error: invalid instruction 0x" << std::hex << instr_code << ", at PC=0x" << warp.PC << " (#" << std::dec << uuid << ")" << std::endl;
std::abort();
}
DP(1, "Instr 0x" << std::hex << instr_code << ": " << std::dec << *instr);
// Create trace
auto trace = new instr_trace_t(uuid, arch_);
// Execute
this->execute(*instr, scheduled_warp, trace);
DP(5, "Register state:");
for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
DPN(5, " %r" << std::setfill('0') << std::setw(2) << i << ':' << std::hex);
// Integer register file
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
DPN(5, ' ' << std::setfill('0') << std::setw(XLEN/4) << warp.ireg_file.at(j).at(i) << std::setfill(' ') << ' ');
}
DPN(5, '|');
// Floating point register file
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
DPN(5, ' ' << std::setfill('0') << std::setw(16) << warp.freg_file.at(j).at(i) << std::setfill(' ') << ' ');
}
DPN(5, std::dec << std::endl);
}
return trace;
}
bool Emulator::running() const {
return active_warps_.any();
}
int Emulator::get_exitcode() const {
return warps_.at(0).ireg_file.at(0).at(3);
}
void Emulator::suspend(uint32_t wid) {
assert(!stalled_warps_.test(wid));
stalled_warps_.set(wid);
}
void Emulator::resume(uint32_t wid) {
if (wid != 0xffffffff) {
assert(stalled_warps_.test(wid));
stalled_warps_.reset(wid);
} else {
stalled_warps_.reset();
}
}
bool Emulator::wspawn(uint32_t num_warps, Word nextPC) {
num_warps = std::min<uint32_t>(num_warps, arch_.num_warps());
if (num_warps < 2 && active_warps_.count() == 1)
return true;
wspawn_.valid = true;
wspawn_.num_warps = num_warps;
wspawn_.nextPC = nextPC;
return false;
}
bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
if (count < 2)
return true;
uint32_t bar_idx = bar_id & 0x7fffffff;
bool is_global = (bar_id >> 31);
auto& barrier = barriers_.at(bar_idx);
barrier.set(wid);
DP(3, "*** Suspend core #" << core_->id() << ", warp #" << wid << " at barrier #" << bar_idx);
if (is_global) {
// global barrier handling
if (barrier.count() == active_warps_.count()) {
core_->socket()->barrier(bar_idx, count, core_->id());
barrier.reset();
}
} else {
// local barrier handling
if (barrier.count() == (size_t)count) {
// resume suspended warps
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
if (barrier.test(i)) {
DP(3, "*** Resume core #" << core_->id() << ", warp #" << i << " at barrier #" << bar_idx);
stalled_warps_.reset(i);
}
}
barrier.reset();
}
}
return false;
}
#ifdef VM_ENABLE
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size);
try
{
mmu_.read(data, addr, size, ACCESS_TYPE::FETCH);
}
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
#else
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
mmu_.read(data, addr, size, 0);
}
#endif
#ifdef VM_ENABLE
void Emulator::set_satp(uint64_t satp) {
DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n");
set_csr(VX_CSR_SATP,satp,0,0);
}
#endif
#ifdef VM_ENABLE
void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
DP(1, "*** dcache_read 0x" << std::hex << addr << ", size = 0x " << size);
auto type = get_addr_type(addr);
if (type == AddrType::Shared) {
core_->local_mem()->read(data, addr, size);
} else {
try
{
mmu_.read(data, addr, size, ACCESS_TYPE::LOAD);
}
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
DPH(2, "Mem Read: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#else
void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
auto type = get_addr_type(addr);
if (type == AddrType::Shared) {
core_->local_mem()->read(data, addr, size);
} else {
mmu_.read(data, addr, size, 0);
}
DPH(2, "Mem Read: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << std::dec << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#endif
#ifdef VM_ENABLE
void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
DP(1, "*** dcache_write 0x" << std::hex << addr << ", size = 0x " << size);
auto type = get_addr_type(addr);
if (addr >= uint64_t(IO_COUT_ADDR)
&& addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
this->writeToStdOut(data, addr, size);
} else {
if (type == AddrType::Shared) {
core_->local_mem()->write(data, addr, size);
} else {
try
{
// mmu_.write(data, addr, size, 0);
mmu_.write(data, addr, size, ACCESS_TYPE::STORE);
}
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
}
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#else
void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
auto type = get_addr_type(addr);
if (addr >= uint64_t(IO_COUT_ADDR)
&& addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
this->writeToStdOut(data, addr, size);
} else {
if (type == AddrType::Shared) {
core_->local_mem()->write(data, addr, size);
} else {
mmu_.write(data, addr, size, 0);
}
}
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << std::dec << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#endif
void Emulator::dcache_amo_reserve(uint64_t addr) {
auto type = get_addr_type(addr);
if (type == AddrType::Global) {
mmu_.amo_reserve(addr);
}
}
bool Emulator::dcache_amo_check(uint64_t addr) {
auto type = get_addr_type(addr);
if (type == AddrType::Global) {
return mmu_.amo_check(addr);
}
return false;
}
void Emulator::writeToStdOut(const void* data, uint64_t addr, uint32_t size) {
if (size != 1)
std::abort();
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
auto& ss_buf = print_bufs_[tid];
char c = *(char*)data;
ss_buf << c;
if (c == '\n') {
std::cout << "#" << tid << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
void Emulator::cout_flush() {
for (auto& buf : print_bufs_) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
}
#ifdef XLEN_64
#define CSR_READ_64(addr, value) \
case addr: return value
#else
#define CSR_READ_64(addr, value) \
case addr : return (uint32_t)value; \
case (addr + (VX_CSR_MPM_BASE_H-VX_CSR_MPM_BASE)) : return ((value >> 32) & 0xFFFFFFFF)
#endif
Word Emulator::get_tiles() {
return mat_size;
}
Word Emulator::get_tc_size() {
return tc_size;
}
Word Emulator::get_tc_num() {
return tc_num;
}
Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
auto core_perf = core_->perf_stats();
switch (addr) {
case VX_CSR_SATP:
#ifdef VM_ENABLE
// return csrs_.at(wid).at(tid)[addr];
return mmu_.get_satp();
#endif
case VX_CSR_PMPCFG0:
case VX_CSR_PMPADDR0:
case VX_CSR_MSTATUS:
case VX_CSR_MISA:
case VX_CSR_MEDELEG:
case VX_CSR_MIDELEG:
case VX_CSR_MIE:
case VX_CSR_MTVEC:
case VX_CSR_MEPC:
case VX_CSR_MNSTATUS:
case VX_CSR_MCAUSE:
return 0;
case VX_CSR_FFLAGS: return warps_.at(wid).fcsr & 0x1F;
case VX_CSR_FRM: return (warps_.at(wid).fcsr >> 5);
case VX_CSR_FCSR: return warps_.at(wid).fcsr;
#ifdef EXT_V_ENABLE
// Vector CRSs
case VX_CSR_VSTART:
return csrs_.at(wid).at(tid)[VX_CSR_VSTART];
case VX_CSR_VXSAT:
return csrs_.at(wid).at(tid)[VX_CSR_VXSAT];
case VX_CSR_VXRM:
return csrs_.at(wid).at(tid)[VX_CSR_VXRM];
case VX_CSR_VCSR: {
Word vxsat = csrs_.at(wid).at(tid)[VX_CSR_VXSAT];
Word vxrm = csrs_.at(wid).at(tid)[VX_CSR_VXRM];
return (vxrm << 1) | vxsat;
}
case VX_CSR_VL:
return csrs_.at(wid).at(tid)[VX_CSR_VL];
case VX_CSR_VTYPE:
return csrs_.at(wid).at(tid)[VX_CSR_VTYPE];
case VX_CSR_VLENB:
return VLEN / 8;
case VX_CSR_VCYCLE:
return csrs_.at(wid).at(tid)[VX_CSR_VCYCLE];
case VX_CSR_VTIME:
return csrs_.at(wid).at(tid)[VX_CSR_VTIME];
case VX_CSR_VINSTRET:
return csrs_.at(wid).at(tid)[VX_CSR_VINSTRET];
#endif
case VX_CSR_MHARTID: return (core_->id() * arch_.num_warps() + wid) * arch_.num_threads() + tid;
case VX_CSR_THREAD_ID: return tid;
case VX_CSR_WARP_ID: return wid;
case VX_CSR_CORE_ID: return core_->id();
case VX_CSR_ACTIVE_THREADS:return warps_.at(wid).tmask.to_ulong();
case VX_CSR_ACTIVE_WARPS:return active_warps_.to_ulong();
case VX_CSR_NUM_THREADS:return arch_.num_threads();
case VX_CSR_NUM_WARPS: return arch_.num_warps();
case VX_CSR_NUM_CORES: return uint32_t(arch_.num_cores()) * arch_.num_clusters();
case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base();
case VX_CSR_MSCRATCH: return csr_mscratch_;
case VX_MAT_MUL_SIZE: return mat_size;
case VX_TC_NUM: return tc_num;
case VX_TC_SIZE: return tc_size;
CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles);
CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs);
default:
if ((addr >= VX_CSR_MPM_BASE && addr < (VX_CSR_MPM_BASE + 32))
|| (addr >= VX_CSR_MPM_BASE_H && addr < (VX_CSR_MPM_BASE_H + 32))) {
// user-defined MPM CSRs
auto perf_class = dcrs_.base_dcrs.read(VX_DCR_BASE_MPM_CLASS);
switch (perf_class) {
case VX_DCR_MPM_CLASS_NONE:
break;
case VX_DCR_MPM_CLASS_CORE: {
switch (addr) {
CSR_READ_64(VX_CSR_MPM_SCHED_ID, core_perf.sched_idle);
CSR_READ_64(VX_CSR_MPM_SCHED_ST, core_perf.sched_stalls);
CSR_READ_64(VX_CSR_MPM_IBUF_ST, core_perf.ibuf_stalls);
CSR_READ_64(VX_CSR_MPM_SCRB_ST, core_perf.scrb_stalls);
CSR_READ_64(VX_CSR_MPM_OPDS_ST, core_perf.opds_stalls);
CSR_READ_64(VX_CSR_MPM_SCRB_ALU, core_perf.scrb_alu);
CSR_READ_64(VX_CSR_MPM_SCRB_FPU, core_perf.scrb_fpu);
CSR_READ_64(VX_CSR_MPM_SCRB_LSU, core_perf.scrb_lsu);
CSR_READ_64(VX_CSR_MPM_SCRB_SFU, core_perf.scrb_sfu);
CSR_READ_64(VX_CSR_MPM_SCRB_CSRS, core_perf.scrb_csrs);
CSR_READ_64(VX_CSR_MPM_SCRB_WCTL, core_perf.scrb_wctl);
CSR_READ_64(VX_CSR_MPM_IFETCHES, core_perf.ifetches);
CSR_READ_64(VX_CSR_MPM_LOADS, core_perf.loads);
CSR_READ_64(VX_CSR_MPM_STORES, core_perf.stores);
CSR_READ_64(VX_CSR_MPM_IFETCH_LT, core_perf.ifetch_latency);
CSR_READ_64(VX_CSR_MPM_LOAD_LT, core_perf.load_latency);
}
} break;
case VX_DCR_MPM_CLASS_MEM: {
auto proc_perf = core_->socket()->cluster()->processor()->perf_stats();
auto cluster_perf = core_->socket()->cluster()->perf_stats();
auto socket_perf = core_->socket()->perf_stats();
auto lmem_perf = core_->local_mem()->perf_stats();
uint64_t coalescer_misses = 0;
for (uint i = 0; i < NUM_LSU_BLOCKS; ++i) {
coalescer_misses += core_->mem_coalescer(i)->perf_stats().misses;
}
switch (addr) {
CSR_READ_64(VX_CSR_MPM_ICACHE_READS, socket_perf.icache.reads);
CSR_READ_64(VX_CSR_MPM_ICACHE_MISS_R, socket_perf.icache.read_misses);
CSR_READ_64(VX_CSR_MPM_ICACHE_MSHR_ST, socket_perf.icache.mshr_stalls);
CSR_READ_64(VX_CSR_MPM_DCACHE_READS, socket_perf.dcache.reads);
CSR_READ_64(VX_CSR_MPM_DCACHE_WRITES, socket_perf.dcache.writes);
CSR_READ_64(VX_CSR_MPM_DCACHE_MISS_R, socket_perf.dcache.read_misses);
CSR_READ_64(VX_CSR_MPM_DCACHE_MISS_W, socket_perf.dcache.write_misses);
CSR_READ_64(VX_CSR_MPM_DCACHE_BANK_ST, socket_perf.dcache.bank_stalls);
CSR_READ_64(VX_CSR_MPM_DCACHE_MSHR_ST, socket_perf.dcache.mshr_stalls);
CSR_READ_64(VX_CSR_MPM_L2CACHE_READS, cluster_perf.l2cache.reads);
CSR_READ_64(VX_CSR_MPM_L2CACHE_WRITES, cluster_perf.l2cache.writes);
CSR_READ_64(VX_CSR_MPM_L2CACHE_MISS_R, cluster_perf.l2cache.read_misses);
CSR_READ_64(VX_CSR_MPM_L2CACHE_MISS_W, cluster_perf.l2cache.write_misses);
CSR_READ_64(VX_CSR_MPM_L2CACHE_BANK_ST, cluster_perf.l2cache.bank_stalls);
CSR_READ_64(VX_CSR_MPM_L2CACHE_MSHR_ST, cluster_perf.l2cache.mshr_stalls);
CSR_READ_64(VX_CSR_MPM_L3CACHE_READS, proc_perf.l3cache.reads);
CSR_READ_64(VX_CSR_MPM_L3CACHE_WRITES, proc_perf.l3cache.writes);
CSR_READ_64(VX_CSR_MPM_L3CACHE_MISS_R, proc_perf.l3cache.read_misses);
CSR_READ_64(VX_CSR_MPM_L3CACHE_MISS_W, proc_perf.l3cache.write_misses);
CSR_READ_64(VX_CSR_MPM_L3CACHE_BANK_ST, proc_perf.l3cache.bank_stalls);
CSR_READ_64(VX_CSR_MPM_L3CACHE_MSHR_ST, proc_perf.l3cache.mshr_stalls);
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_ST, proc_perf.memsim.bank_stalls);
CSR_READ_64(VX_CSR_MPM_COALESCER_MISS, coalescer_misses);
CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);
CSR_READ_64(VX_CSR_MPM_LMEM_BANK_ST, lmem_perf.bank_stalls);
}
} break;
#ifdef EXT_V_ENABLE
case VX_DCR_MPM_CLASS_VEC: {
VecUnit::PerfStats vec_perf_stats;
vec_perf_stats += vec_unit_->perf_stats();
switch (addr) {
CSR_READ_64(VX_CSR_MPM_VEC_READS, vec_perf_stats.reads);
CSR_READ_64(VX_CSR_MPM_VEC_WRITES, vec_perf_stats.writes);
CSR_READ_64(VX_CSR_MPM_VEC_LAT, vec_perf_stats.latency);
CSR_READ_64(VX_CSR_MPM_VEC_ST, vec_perf_stats.stalls);
}
} break;
#endif
default: {
std::cout << "Error: invalid MPM CLASS: value=" << perf_class << std::endl;
std::abort();
} break;
}
} else {
std::cout << "Error: invalid CSR read addr=0x"<< std::hex << addr << std::dec << std::endl;
std::abort();
}
}
return 0;
}
void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
__unused (tid);
switch (addr) {
case VX_CSR_FFLAGS:
warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F);
break;
case VX_CSR_FRM:
warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0xE0) | (value << 5);
break;
case VX_CSR_FCSR:
warps_.at(wid).fcsr = value & 0xff;
break;
case VX_CSR_MSCRATCH:
csr_mscratch_ = value;
break;
#ifdef EXT_V_ENABLE
// Vector CRSs
case VX_CSR_VSTART:
csrs_.at(wid).at(tid)[VX_CSR_VSTART] = value;
break;
case VX_CSR_VXSAT:
csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1;
break;
case VX_CSR_VXRM:
csrs_.at(wid).at(tid)[VX_CSR_VXRM] = value & 0b11;
break;
case VX_CSR_VCSR:
csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1;
csrs_.at(wid).at(tid)[VX_CSR_VXRM] = (value >> 1) & 0b11;
break;
case VX_CSR_VL: // read only, written by vset(i)vl(i)
csrs_.at(wid).at(tid)[VX_CSR_VL] = value;
break;
case VX_CSR_VTYPE: // read only, written by vset(i)vl(i)
csrs_.at(wid).at(tid)[VX_CSR_VTYPE] = value;
break;
case VX_CSR_VLENB: // read only, set to VLEN / 8
#endif
case VX_CSR_SATP:
#ifdef VM_ENABLE
// warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F);
// csrs_.at(wid).at(tid)[addr] = value; //what is wid and tid?
mmu_.set_satp(value);
break;
#endif
case VX_CSR_MSTATUS:
case VX_CSR_MEDELEG:
case VX_CSR_MIDELEG:
case VX_CSR_MIE:
case VX_CSR_MTVEC:
case VX_CSR_MEPC:
case VX_CSR_PMPCFG0:
case VX_CSR_PMPADDR0:
case VX_CSR_MNSTATUS:
case VX_CSR_MCAUSE:
break;
case VX_MAT_MUL_SIZE:
mat_size = value;
break;
case VX_TC_NUM:
tc_num = value;
break;
case VX_TC_SIZE:
tc_size = value;
break;
default: {
std::cout << "Error: invalid CSR write addr=0x" << std::hex << addr << ", value=0x" << value << std::dec << std::endl;
std::abort();
}
}
}
uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) {
return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3;
}
void Emulator::update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid) {
if (fflags) {
this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, tid, wid) | fflags, tid, wid);
this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, tid, wid) | fflags, tid, wid);
}
}
// For riscv-vector test functionality, ecall and ebreak must trap
// These instructions are used in the vector tests to stop execution of the test
// Therefore, without these instructions, undefined and incorrect behavior happens
//
// For now, we need these instructions to trap for testing the riscv-vector isa
void Emulator::trigger_ecall() {
active_warps_.reset();
}
void Emulator::trigger_ebreak() {
active_warps_.reset();
}