simX refactoring

This commit is contained in:
Blaise Tine 2021-02-27 02:27:19 -08:00
parent 4fe345f269
commit a8452483fe
20 changed files with 1198 additions and 1789 deletions

View file

@ -185,7 +185,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES;
*value = NUM_CORES * NUM_CLUSTERS;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;

View file

@ -12,8 +12,8 @@ CXXFLAGS += -DDUMP_PERF_STATS
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CXXFLAGS += $(CONFIGS)
@ -21,7 +21,7 @@ LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/instr.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
# Debugigng
ifdef DEBUG

View file

@ -144,19 +144,18 @@ private:
void run() {
vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS);
vortex::Decoder decoder(arch);
vortex::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
vortex::MemoryUnit mu(PAGE_SIZE, arch.wsize(), true);
mu.attach(ram_, 0);
std::vector<std::shared_ptr<vortex::Core>> cores(NUM_CORES);
for (size_t i = 0; i < NUM_CORES; ++i) {
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu);
std::vector<std::shared_ptr<vortex::Core>> cores(arch.num_cores());
for (int i = 0; i < arch.num_cores(); ++i) {
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu, i);
}
bool running;
do {
running = false;
for (size_t i = 0; i < NUM_CORES; ++i) {
for (int i = 0; i < arch.num_cores(); ++i) {
if (!cores[i]->running())
continue;
running = true;
@ -236,7 +235,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES;
*value = NUM_CORES * NUM_CLUSTERS;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;

View file

@ -13,7 +13,7 @@ RTL_DIR = ../hw/rtl
PROJECT = simX
SRCS = util.cpp args.cpp mem.cpp core.cpp warp.cpp instr.cpp decode.cpp execute.cpp main.cpp
SRCS = util.cpp args.cpp mem.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
# Debugigng
ifdef DEBUG

View file

@ -11,156 +11,56 @@ namespace vortex {
class ArchDef {
public:
struct Undefined {};
ArchDef(const std::string &s,
ArchDef(const std::string &/*arch*/,
int num_cores,
int num_warps,
int num_threads) {
std::istringstream iss(s.c_str());
wordSize_ = 4;
encChar_ = 'w';
numRegs_ = 32;
numPRegs_ = 0;
numCores_ = num_cores;
numWarps_ = num_warps;
numThreads_ = num_threads;
extent_ = EXT_END;
int num_threads) {
wsize_ = 4;
vsize_ = 16;
num_regs_ = 32;
num_csrs_ = 4096;
num_cores_ = num_cores;
num_warps_ = num_warps;
num_threads_ = num_threads;
}
operator std::string () const {
if (extent_ == EXT_NULL)
return "";
std::ostringstream oss;
if (extent_ >= EXT_WORDSIZE) oss << wordSize_;
if (extent_ >= EXT_ENC ) oss << encChar_;
if (extent_ >= EXT_REGS ) oss << numRegs_;
if (extent_ >= EXT_PREGS ) oss << '/' << numPRegs_;
if (extent_ >= EXT_THREADS ) oss << '/' << numThreads_;
if (extent_ >= EXT_WARPS ) oss << '/' << numWarps_;
if (extent_ >= EXT_CORES ) oss << '/' << numCores_;
return oss.str();
int wsize() const {
return wsize_;
}
bool operator==(const ArchDef &r) const {
Extent minExtent(r.extent_ > extent_ ? extent_ : r.extent_);
// Can't be equal if we can't specify a binary encoding at all.
if (minExtent < EXT_PREGS)
return false;
if (minExtent >= EXT_WORDSIZE) {
if (wordSize_!=r.wordSize_)
return false;
}
if (minExtent >= EXT_ENC) {
if (encChar_ != r.encChar_)
return false;
}
if (minExtent >= EXT_REGS) {
if (numRegs_ != r.numRegs_)
return false;
}
if (minExtent >= EXT_PREGS) {
if (numPRegs_ != r.numPRegs_)
return false;
}
if (minExtent >= EXT_THREADS) {
if (numThreads_ != r.numThreads_)
return false;
}
if (minExtent >= EXT_WARPS) {
if (numWarps_ != r.numWarps_)
return false;
}
if (minExtent >= EXT_CORES) {
if (numCores_ != r.numCores_)
return false;
}
return true;
int vsize() const {
return vsize_;
}
bool operator!=(const ArchDef &r) const {
return !(*this == r);
int num_regs() const {
return num_regs_;
}
Size getWordSize() const {
if (extent_ < EXT_WORDSIZE)
throw Undefined();
return wordSize_;
int num_csrs() const {
return num_csrs_;
}
char getEncChar() const {
if ((extent_ < EXT_ENC) || (encChar_ == 'x'))
throw Undefined();
return encChar_;
int num_threads() const {
return num_threads_;
}
RegNum getNumRegs() const {
if (extent_ < EXT_REGS)
throw Undefined();
return numRegs_;
int num_warps() const {
return num_warps_;
}
RegNum getNumPRegs() const {
if (extent_ < EXT_PREGS)
throw Undefined();
return numPRegs_;
}
ThdNum getNumThreads() const {
if (extent_ < EXT_THREADS)
throw Undefined();
return numThreads_;
}
ThdNum getNumWarps() const {
if (extent_ < EXT_WARPS)
throw Undefined();
return numWarps_;
}
ThdNum getNumCores() const {
if (extent_ < EXT_CORES)
throw Undefined();
return numCores_;
}
bool is_cpu_mode() const {
return cpu_mode_;
int num_cores() const {
return num_cores_;
}
private:
enum Extent {
EXT_NULL,
EXT_WORDSIZE,
EXT_ENC,
EXT_REGS,
EXT_PREGS,
EXT_THREADS,
EXT_WARPS,
EXT_CORES,
EXT_END
};
Extent extent_;
Size wordSize_;
ThdNum numThreads_;
ThdNum numWarps_;
ThdNum numCores_;
RegNum numRegs_;
ThdNum numPRegs_;
char encChar_;
bool cpu_mode_;
int wsize_;
int vsize_;
int num_regs_;
int num_csrs_;
int num_threads_;
int num_warps_;
int num_cores_;
};
}

View file

@ -1,10 +1,7 @@
#include <iostream>
#include <iomanip>
#include <string.h>
// #define USE_DEBUG 7
// #define PRINT_ACTIVE_THREADS
#include <assert.h>
#include "types.h"
#include "util.h"
#include "archdef.h"
@ -14,21 +11,25 @@
#include "debug.h"
#define INIT_TRACE(trace_inst) \
trace_inst.valid_inst = false; \
trace_inst.pc = 0; \
trace_inst.valid = false; \
trace_inst.PC = 0; \
trace_inst.wid = schedule_w_; \
trace_inst.rs1 = -1; \
trace_inst.rs2 = -1; \
trace_inst.rd = -1; \
trace_inst.vs1 = -1; \
trace_inst.vs2 = -1; \
trace_inst.vd = -1; \
trace_inst.irs1 = -1; \
trace_inst.irs2 = -1; \
trace_inst.frs1 = -1; \
trace_inst.frs2 = -1; \
trace_inst.frs3 = -1; \
trace_inst.frd = -1; \
trace_inst.ird = -1; \
trace_inst.vrs1 = -1; \
trace_inst.vrs2 = -1; \
trace_inst.vrd = -1; \
trace_inst.is_lw = false; \
trace_inst.is_sw = false; \
if (trace_inst.mem_addresses != NULL) \
free(trace_inst.mem_addresses); \
trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++) \
for (int tid = 0; tid < arch_.num_threads(); tid++) \
trace_inst.mem_addresses[tid] = 0xdeadbeef; \
trace_inst.mem_stall_cycles = 0; \
trace_inst.fetch_stall_cycles = 0; \
@ -37,18 +38,22 @@
trace_inst.stalled = false;
#define CPY_TRACE(drain, source) \
drain.valid_inst = source.valid_inst; \
drain.pc = source.pc; \
drain.valid = source.valid; \
drain.PC = source.PC; \
drain.wid = source.wid; \
drain.rs1 = source.rs1; \
drain.rs2 = source.rs2; \
drain.rd = source.rd; \
drain.vs1 = source.vs1; \
drain.vs2 = source.vs2; \
drain.vd = source.vd; \
drain.irs1 = source.irs1; \
drain.irs2 = source.irs2; \
drain.ird = source.ird; \
drain.frs1 = source.frs1; \
drain.frs2 = source.frs2; \
drain.frs3 = source.frs3; \
drain.frd = source.frd; \
drain.vrs1 = source.vrs1; \
drain.vrs2 = source.vrs2; \
drain.vrd = source.vrd; \
drain.is_lw = source.is_lw; \
drain.is_sw = source.is_sw; \
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++)\
for (int tid = 0; tid < arch_.num_threads(); tid++) \
drain.mem_addresses[tid] = source.mem_addresses[tid]; \
drain.mem_stall_cycles = source.mem_stall_cycles; \
drain.fetch_stall_cycles = source.fetch_stall_cycles; \
@ -60,17 +65,17 @@ using namespace vortex;
void printTrace(trace_inst_t *trace, const char *stage_name) {
__unused(trace, stage_name);
D(3, stage_name << ": valid=" << trace->valid_inst);
D(3, stage_name << ": PC=" << std::hex << trace->pc << std::dec);
D(3, stage_name << ": wid=" << trace->wid);
D(3, stage_name << ": rd=" << trace->rd << ", rs1=" << trace->rs1 << ", trs2=" << trace->rs2);
D(3, stage_name << ": is_lw=" << trace->is_lw);
D(3, stage_name << ": is_sw=" << trace->is_sw);
D(3, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
D(3, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
D(3, stage_name << ": stall_warp=" << trace->stall_warp);
D(3, stage_name << ": wspawn=" << trace->wspawn);
D(3, stage_name << ": stalled=" << trace->stalled);
D(4, stage_name << ": valid=" << trace->valid);
D(4, stage_name << ": PC=" << std::hex << trace->PC << std::dec);
D(4, stage_name << ": wid=" << trace->wid);
D(4, stage_name << ": rd=" << trace->ird << ", rs1=" << trace->irs1 << ", trs2=" << trace->irs2);
D(4, stage_name << ": is_lw=" << trace->is_lw);
D(4, stage_name << ": is_sw=" << trace->is_sw);
D(4, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
D(4, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
D(4, stage_name << ": stall_warp=" << trace->stall_warp);
D(4, stage_name << ": wspawn=" << trace->wspawn);
D(4, stage_name << ": stalled=" << trace->stalled);
}
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
@ -79,8 +84,7 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
, decoder_(decoder)
, mem_(mem)
, steps_(0)
, num_instructions_(0) {
release_warp_ = false;
, num_insts_(0) {
foundSchedule_ = true;
schedule_w_ = 0;
@ -98,23 +102,17 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
INIT_TRACE(inst_in_lsu_);
INIT_TRACE(inst_in_wb_);
for (int i = 0; i < 32; i++) {
stalled_warps_[i] = false;
for (int j = 0; j < 32; j++) {
renameTable_[i][j] = true;
}
iRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
fRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
vRenameTable_.resize(arch.num_regs(), false);
stalled_warps_.resize(arch.num_warps(), false);
for (int i = 0; i < arch_.num_warps(); ++i) {
warps_.emplace_back(this, i);
}
for (int i = 0; i < 32; i++) {
vecRenameTable_[i] = true;
}
for (unsigned i = 0; i < arch_.getNumWarps(); ++i) {
warps_.push_back(Warp(this, i));
}
warps_[0].setActiveThreads(1);
warps_[0].setSpawned(true);
warps_[0].setTmask(0, true);
}
Core::~Core() {
@ -125,32 +123,20 @@ void Core::step() {
D(3, "###########################################################");
steps_++;
D(3, "cycle: " << steps_);
D(3, std::dec << "Core" << id_ << ": cycle: " << steps_);
DPH(3, "stalled warps:");
for (ThdNum widd = 0; widd < arch_.getNumWarps(); widd++) {
DPN(3, " " << stalled_warps_[widd]);
for (int i = 0; i < arch_.num_warps(); i++) {
DPN(3, " " << stalled_warps_[i]);
}
DPN(3, "\n");
// cout << "About to call writeback" << std::endl;
this->writeback();
// cout << "About to call load_store" << std::endl;
this->load_store();
// cout << "About to call execute_unit" << std::endl;
this->execute_unit();
// cout << "About to call scheduler" << std::endl;
this->scheduler();
// cout << "About to call decode" << std::endl;
this->decode();
// D(3, "About to call fetch" << std::flush);
this->fetch();
// D(3, "Finished fetch" << std::flush);
if (release_warp_) {
release_warp_ = false;
stalled_warps_[release_warp_num_] = false;
}
DPN(3, std::flush);
}
@ -161,10 +147,8 @@ void Core::warpScheduler() {
for (size_t wid = 0; wid < warps_.size(); ++wid) {
// round robin scheduling
next_warp = (next_warp + 1) % warps_.size();
bool has_active_threads = (warps_[next_warp].getActiveThreads() > 0);
bool has_active_threads = warps_[next_warp].active();
bool stalled = stalled_warps_[next_warp];
if (has_active_threads && !stalled) {
foundSchedule_ = true;
break;
@ -174,35 +158,28 @@ void Core::warpScheduler() {
}
void Core::fetch() {
// D(-1, "Found schedule: " << foundSchedule_);
if ((!inst_in_scheduler_.stalled)
&& (inst_in_fetch_.fetch_stall_cycles == 0)) {
// CPY_TRACE(inst_in_decode_, inst_in_fetch_);
// if (warps_[schedule_w_].activeThreads)
{
INIT_TRACE(inst_in_fetch_);
INIT_TRACE(inst_in_fetch_);
if (foundSchedule_) {
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
if (foundSchedule_) {
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
num_insts_ = num_insts_ + warps_[schedule_w_].getActiveThreads();
num_instructions_ = num_instructions_ + warps_[schedule_w_].getActiveThreads();
warps_[schedule_w_].step(&inst_in_fetch_);
warps_[schedule_w_].step(&inst_in_fetch_);
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
if (active_threads_b != active_threads_a) {
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
}
this->getCacheDelays(&inst_in_fetch_);
if (inst_in_fetch_.stall_warp) {
stalled_warps_[inst_in_fetch_.wid] = true;
}
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
if (active_threads_b != active_threads_a) {
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
}
this->getCacheDelays(&inst_in_fetch_);
if (inst_in_fetch_.stall_warp) {
stalled_warps_[inst_in_fetch_.wid] = true;
}
this->warpScheduler();
}
this->warpScheduler();
} else {
inst_in_fetch_.stalled = false;
if (inst_in_fetch_.fetch_stall_cycles > 0)
@ -218,7 +195,6 @@ void Core::decode() {
CPY_TRACE(inst_in_decode_, inst_in_fetch_);
INIT_TRACE(inst_in_fetch_);
}
//printTrace(&inst_in_decode_, "Decode");
}
void Core::scheduler() {
@ -226,136 +202,162 @@ void Core::scheduler() {
CPY_TRACE(inst_in_scheduler_, inst_in_decode_);
INIT_TRACE(inst_in_decode_);
}
//printTrace(&inst_in_scheduler_, "Scheduler");
}
void Core::load_store() {
if ((inst_in_lsu_.mem_stall_cycles > 0) || (inst_in_lsu_.stalled)) {
if ((inst_in_lsu_.mem_stall_cycles > 0) || inst_in_lsu_.stalled) {
// LSU currently busy
if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) {
inst_in_scheduler_.stalled = true;
}
} else {
// LSU not busy
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) {
// Scheduler has LSU inst
bool scheduler_srcs_ready = true;
if (inst_in_scheduler_.rs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
}
if (!inst_in_scheduler_.is_lw && !inst_in_scheduler_.is_sw)
return;
if (inst_in_scheduler_.rs2 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2];
}
// Scheduler has LSU inst
bool scheduler_srcs_busy = false;
if (inst_in_scheduler_.vs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
}
if (inst_in_scheduler_.vs2 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
}
if (inst_in_scheduler_.irs1 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
}
if (scheduler_srcs_ready) {
if (inst_in_scheduler_.rd != -1)
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
if (inst_in_scheduler_.rd != -1)
vecRenameTable_[inst_in_scheduler_.vd] = false;
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
} else {
inst_in_scheduler_.stalled = true;
// INIT_TRACE(inst_in_lsu_);
}
} else {
// INIT_TRACE(inst_in_lsu_);
if (inst_in_scheduler_.irs2 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
}
if (inst_in_scheduler_.frs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
}
if (inst_in_scheduler_.frs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
}
if (inst_in_scheduler_.frs3 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
}
if (inst_in_scheduler_.vrs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
}
if (inst_in_scheduler_.vrs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
}
if (scheduler_srcs_busy) {
inst_in_scheduler_.stalled = true;
} else {
if (inst_in_scheduler_.ird > 0)
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
if (inst_in_scheduler_.frd >= 0)
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
if (inst_in_scheduler_.vrd >= 0)
vRenameTable_[inst_in_scheduler_.vrd] = true;
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
}
}
if (inst_in_lsu_.mem_stall_cycles > 0)
inst_in_lsu_.mem_stall_cycles--;
//printTrace(&inst_in_lsu_, "LSU");
}
void Core::execute_unit() {
// EXEC is always not busy
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) {
// Not an execute instruction
// INIT_TRACE(inst_in_exe_);
} else {
bool scheduler_srcs_ready = true;
if (inst_in_scheduler_.rs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
// cout << "Rename RS1: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1] << " wid: " << inst_in_scheduler_.wid << '\n';
}
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)
return;
bool scheduler_srcs_busy = false;
if (inst_in_scheduler_.rs2 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2];
// cout << "Rename RS2: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2] << " wid: " << inst_in_scheduler_.wid << '\n';
}
// cout << "About to check vs*\n" << std::flush;
if (inst_in_scheduler_.vs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
}
if (inst_in_scheduler_.vs2 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
}
// cout << "Finished sources\n" << std::flush;
if (scheduler_srcs_ready) {
if (inst_in_scheduler_.rd != -1) {
// cout << "rename setting rd: " << inst_in_scheduler_.rd << " to not useabel wid: " << inst_in_scheduler_.wid << '\n';
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
}
// cout << "About to check vector wb: " << inst_in_scheduler_.vd << "\n" << std::flush;
if (inst_in_scheduler_.vd != -1) {
vecRenameTable_[inst_in_scheduler_.vd] = false;
}
// cout << "Finished wb checking" << "\n" << std::flush;
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
// cout << "Finished trace copying and clearning" << "\n" << std::flush;
} else {
D(3, "Execute: srcs not ready!");
inst_in_scheduler_.stalled = true;
// INIT_TRACE(inst_in_exe_);
}
if (inst_in_scheduler_.irs1 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
}
//printTrace(&inst_in_exe_, "EXE");
// INIT_TRACE(inst_in_exe_);
if (inst_in_scheduler_.irs2 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
}
if (inst_in_scheduler_.frs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
}
if (inst_in_scheduler_.frs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
}
if (inst_in_scheduler_.frs3 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
}
if (inst_in_scheduler_.vrs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
}
if (inst_in_scheduler_.vrs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
}
if (scheduler_srcs_busy) {
D(3, "Execute: srcs not ready!");
inst_in_scheduler_.stalled = true;
} else {
if (inst_in_scheduler_.ird > 0) {
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
}
if (inst_in_scheduler_.frd >= 0) {
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
}
if (inst_in_scheduler_.vrd >= 0) {
vRenameTable_[inst_in_scheduler_.vrd] = true;
}
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
}
}
void Core::writeback() {
if (inst_in_wb_.rd > 0)
renameTable_[inst_in_wb_.wid][inst_in_wb_.rd] = true;
if (inst_in_wb_.vd > 0)
vecRenameTable_[inst_in_wb_.vd] = true;
if (inst_in_wb_.ird > 0) {
iRenameTable_[inst_in_wb_.wid][inst_in_wb_.ird] = false;
}
if (inst_in_wb_.frd >= 0) {
fRenameTable_[inst_in_wb_.wid][inst_in_wb_.frd] = false;
}
if (inst_in_wb_.vrd >= 0) {
vRenameTable_[inst_in_wb_.vrd] = false;
}
if (inst_in_wb_.stall_warp) {
stalled_warps_[inst_in_wb_.wid] = false;
// release_warp_ = true;
// release_warp_num_ = inst_in_wb_.wid;
}
INIT_TRACE(inst_in_wb_);
bool serviced_exe = false;
if ((inst_in_exe_.rd > 0) || (inst_in_exe_.stall_warp)) {
if ((inst_in_exe_.ird > 0)
|| (inst_in_exe_.frd >= 0)
|| (inst_in_exe_.vrd >= 0)
|| (inst_in_exe_.stall_warp)) {
CPY_TRACE(inst_in_wb_, inst_in_exe_);
INIT_TRACE(inst_in_exe_);
serviced_exe = true;
// cout << "WRITEBACK SERVICED EXE\n";
}
if (inst_in_lsu_.is_sw) {
INIT_TRACE(inst_in_lsu_);
} else {
if (((inst_in_lsu_.rd > 0) || (inst_in_lsu_.vd > 0)) && (inst_in_lsu_.mem_stall_cycles == 0)) {
if (((inst_in_lsu_.ird > 0)
|| (inst_in_lsu_.frd >= 0)
|| (inst_in_lsu_.vrd >= 0))
&& (inst_in_lsu_.mem_stall_cycles == 0)) {
if (serviced_exe) {
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used");
// Stalling LSU because EXE is busy
inst_in_lsu_.stalled = true;
} else {
CPY_TRACE(inst_in_wb_, inst_in_lsu_);
@ -366,27 +368,28 @@ void Core::writeback() {
}
void Core::getCacheDelays(trace_inst_t *trace_inst) {
trace_inst->fetch_stall_cycles += 3;
trace_inst->fetch_stall_cycles += 1;
if (trace_inst->is_sw || trace_inst->is_lw) {
trace_inst->mem_stall_cycles += 5;
trace_inst->mem_stall_cycles += 3;
}
}
bool Core::running() const {
bool stages_have_valid = inst_in_fetch_.valid_inst
|| inst_in_decode_.valid_inst
|| inst_in_scheduler_.valid_inst
|| inst_in_lsu_.valid_inst
|| inst_in_exe_.valid_inst
|| inst_in_wb_.valid_inst;
bool stages_have_valid = inst_in_fetch_.valid
|| inst_in_decode_.valid
|| inst_in_scheduler_.valid
|| inst_in_lsu_.valid
|| inst_in_exe_.valid
|| inst_in_wb_.valid;
if (stages_have_valid)
return true;
for (unsigned i = 0; i < warps_.size(); ++i)
if (warps_[i].running()) {
for (unsigned i = 0; i < warps_.size(); ++i) {
if (warps_[i].active()) {
return true;
}
}
return false;
}

View file

@ -60,8 +60,8 @@ public:
return interruptEntry_;
}
unsigned long num_instructions() const {
return num_instructions_;
unsigned long num_insts() const {
return num_insts_;
}
unsigned long num_steps() const {
@ -70,9 +70,10 @@ public:
private:
bool renameTable_[32][32];
bool vecRenameTable_[32];
bool stalled_warps_[32];
std::vector<std::vector<bool>> iRenameTable_;
std::vector<std::vector<bool>> fRenameTable_;
std::vector<bool> vRenameTable_;
std::vector<bool> stalled_warps_;
bool foundSchedule_;
Word id_;
@ -83,10 +84,8 @@ private:
std::unordered_map<Word, std::set<Warp *>> barriers_;
int schedule_w_;
uint64_t steps_;
uint64_t num_instructions_;
uint64_t num_insts_;
Word interruptEntry_;
bool release_warp_;
int release_warp_num_;
trace_inst_t inst_in_fetch_;
trace_inst_t inst_in_decode_;

View file

@ -1,6 +1,8 @@
#pragma once
//#define USE_DEBUG 9
#define USE_DEBUG 3
#define DEBUG_HEADER << "DEBUG "
//#define DEBUG_HEADER << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": "
#ifdef USE_DEBUG
@ -11,13 +13,13 @@
#define D(lvl, x) do { \
if ((lvl) <= USE_DEBUG) { \
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x << std::endl; \
std::cout DEBUG_HEADER << x << std::endl; \
} \
} while(0)
#define DPH(lvl, x) do { \
if ((lvl) <= USE_DEBUG) { \
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x; \
std::cout DEBUG_HEADER << x; \
} \
} while(0)
@ -27,10 +29,6 @@
} \
} while(0)
#define D_RAW(x) do { \
std::cout << x; \
} while (0)
#else
#define DX(x)

View file

@ -54,7 +54,7 @@ std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
}
Decoder::Decoder(const ArchDef &arch) {
inst_s_ = arch.getWordSize() * 8;
inst_s_ = arch.wsize() * 8;
opcode_s_ = 7;
reg_s_ = 5;
func2_s_ = 2;
@ -94,7 +94,11 @@ Decoder::Decoder(const ArchDef &arch) {
v_imm_mask_ = 0x7ff;
}
std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, trace_inst_t *trace_inst) {
std::shared_ptr<Instr> Decoder::decode(
const std::vector<Byte> &v,
Size &idx,
trace_inst_t *trace_inst)
{
Word code(readWord(v, idx, inst_s_ / 8));
// std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n";
@ -107,12 +111,13 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5,
bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3;
InstType curInstType = sc_instTable.at(op).iType; // get current inst type
if (op == Opcode::FL || op == Opcode::FS) { // need to find out whether it is vector or floating point inst
InstType curInstType = sc_instTable.at(op).iType;
if (op == Opcode::FL || op == Opcode::FS) {
// need to find out whether it is vector or floating point inst
Word width_bits = (code >> shift_func3_) & func3_mask_;
if ((width_bits == 0x1) || (width_bits == 0x2)
|| (width_bits == 0x3) || (width_bits == 0x4)) {
curInstType = (op == Opcode::FL)? InstType::I_TYPE : InstType::S_TYPE;
curInstType = (op == Opcode::FL) ? InstType::I_TYPE : InstType::S_TYPE;
}
}
@ -122,52 +127,50 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
break;
case InstType::R_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
if (op == Opcode::FCI) {
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
} else {
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
}
instr->setFunc3((code >> shift_func3_) & func3_mask_);
instr->setFunc7((code >> shift_func7_) & func7_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break;
case InstType::I_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
if (op == Opcode::FCI || op == Opcode::FL) {
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
} else {
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
}
instr->setFunc7((code >> shift_func7_) & func7_mask_);
func3 = (code >> shift_func3_) & func3_mask_;
instr->setFunc3(func3);
if ((func3 == 5) && (op != L_INST) && (op != FL)) {
// std::cout << "func7: " << func7 << "\n";
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_));
} else {
instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_));
}
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break;
case InstType::S_TYPE:
// std::cout << "************STORE\n";
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
if (op == Opcode::FS) {
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
} else {
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
}
instr->setFunc3((code >> shift_func3_) & func3_mask_);
dest_bits = (code >> shift_rd_) & reg_mask_;
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
imeed = (imm_bits << reg_s_) | dest_bits;
// std::cout << "ENC: store imeed: " << imeed << "\n";
instr->setSrcImm(signExt(imeed, 12, s_imm_mask_));
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
break;
case InstType::B_TYPE:
@ -184,51 +187,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
bit_12 = imm_bits >> 6;
imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
instr->setSrcImm(signExt(imeed, 13, b_imm_mask_));
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
break;
case InstType::U_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_));
trace_inst->valid_inst = true;
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break;
case InstType::J_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_);
// [20 | 10:1 | 11 | 19:12]
unordered = code >> shift_j_u_immed_;
bits_19_12 = unordered & 0xff;
bit_11 = (unordered >> 8) & 0x1;
bits_10_1 = (unordered >> 9) & 0x3ff;
bit_20 = (unordered >> 19) & 0x1;
imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
if (bit_20) {
imeed |= ~j_imm_mask_;
}
instr->setSrcImm(imeed);
trace_inst->valid_inst = true;
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break;
case InstType::V_TYPE:
D(3, "Entered here: instr type = vector" << op);
switch (op) {
case Opcode::VSET_ARITH: //TODO: arithmetic ops
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
func3 = (code >> shift_func3_) & func3_mask_;
instr->setFunc3(func3);
D(3, "Entered here: instr type = vector");
@ -247,53 +233,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
instr->setVsew((immed >> 2) & 0x3);
D(3, "sew " << ((immed >> 2) & 0x3));
} else {
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
}
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
} else {
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_) & 0x1);
instr->setFunc6((code >> shift_func6_) & func6_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
}
break;
case Opcode::VL:
D(3, "vector load instr");
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_));
instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->vd = ((code >> shift_rd_) & reg_mask_);
//trace_inst->vs2 = ((code>>shift_rs2_) & reg_mask_);
break;
case Opcode::VS:
instr->setVs3((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_));
instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
//trace_inst->vd = ((code>>shift_rd_) & reg_mask_);
trace_inst->vs1 = ((code >> shift_rd_) & reg_mask_); //vs3
break;
default:
@ -303,23 +270,47 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
break;
case R4_TYPE:
// RT: add R4_TYPE decoder
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcReg((code >> shift_rs3_) & reg_mask_);
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs3_) & reg_mask_);
instr->setFunc3((code >> shift_func3_) & func3_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
trace_inst->rs3 = ((code >> shift_rs3_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break;
default:
std::cout << "Unrecognized argument class in word decoder.\n";
std::abort();
}
if (curInstType != InstType::N_TYPE) {
trace_inst->valid = true;
if (instr->hasRDest()) {
if (instr->is_FpDest()) {
trace_inst->frd = instr->getRDest();
} else if (instr->is_VDest()) {
trace_inst->vrd = instr->getRDest();
} else {
trace_inst->ird = instr->getRDest();
}
}
for (int i = 0; i < instr->getNRSrc(); ++i) {
if (instr->is_FpSrc(i)) {
if (i == 0) trace_inst->frs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->frs2 = instr->getRSrc(i);
else if (i == 2) trace_inst->frs3 = instr->getRSrc(i);
else std::abort();
} else if (instr->is_VSrc(i)) {
if (i == 0) trace_inst->vrs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->vrs2 = instr->getRSrc(i);
else std::abort();
} else {
if (i == 0) trace_inst->irs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->irs2 = instr->getRSrc(i);
else std::abort();
}
}
}
D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush);
return instr;

File diff suppressed because it is too large Load diff

View file

@ -1,23 +0,0 @@
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "instr.h"
using namespace vortex;
void Instr::setVlmul(Word lmul) {
vlmul_ = std::pow(2, lmul);
}
void Instr::setVsew(Word sew) {
vsew_ = std::pow(2, 3+sew);
}
void Instr::setVediv(Word ediv) {
vediv_ = std::pow(2,ediv);
}

View file

@ -52,9 +52,12 @@ public:
Instr()
: opcode_(Opcode::NOP)
, nRsrc_(0)
, nPsrc_(0)
, hasImmSrc_(false)
, hasRDest_(false)
, is_FpDest_(false)
, is_VDest_(false)
, is_FpSrc_(0)
, is_VSrc_(0)
, func2_(0)
, func3_(0)
, func7_(0)
@ -65,20 +68,24 @@ public:
/* Setters used to "craft" the instruction. */
void setOpcode(Opcode opcode) { opcode_ = opcode; }
void setDestReg(RegNum destReg) { hasRDest_ = true; rdest_ = destReg; }
void setSrcReg(RegNum srcReg) { rsrc_[nRsrc_++] = srcReg; }
void setDestReg(int destReg) { hasRDest_ = true; rdest_ = destReg; }
void setSrcReg(int srcReg) { rsrc_[nRsrc_++] = srcReg; }
void setDestFReg(int destReg) { hasRDest_ = true; is_FpDest_ = true; rdest_ = destReg; }
void setSrcFReg(int srcReg) { is_FpSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
void setDestVReg(int destReg) { hasRDest_ = true; is_VDest_ = true; rdest_ = destReg; }
void setSrcVReg(int srcReg) { is_VSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
void setFunc3(Word func3) { func3_ = func3; }
void setFunc7(Word func7) { func7_ = func7; }
void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; }
void setVsetImm(Word vset_imm) { if(vset_imm) vsetImm_ = true; else vsetImm_ = false; }
void setVsetImm(Word vset_imm) { if (vset_imm) vsetImm_ = true; else vsetImm_ = false; }
void setVlsWidth(Word width) { vlsWidth_ = width; }
void setVmop(Word mop) { vMop_ = mop; }
void setVnf(Word nf) { vNf_ = nf; }
void setVmask(Word mask) { vmask_ = mask; }
void setVs3(Word vs) { vs3_ = vs; }
void setVlmul(Word lmul);
void setVsew(Word sew);
void setVediv(Word ediv);
void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
void setVediv(Word ediv) { vediv_ = 1 << ediv; }
void setFunc6(Word func6) { func6_ = func6; }
/* Getters used by encoders. */
@ -86,10 +93,10 @@ public:
Word getFunc3() const { return func3_; }
Word getFunc6() const { return func6_; }
Word getFunc7() const { return func7_; }
RegNum getNRSrc() const { return nRsrc_; }
RegNum getRSrc(RegNum i) const { return rsrc_[i]; }
int getNRSrc() const { return nRsrc_; }
int getRSrc(int i) const { return rsrc_[i]; }
bool hasRDest() const { return hasRDest_; }
RegNum getRDest() const { return rdest_; }
int getRDest() const { return rdest_; }
bool hasImm() const { return hasImmSrc_; }
Word getImm() const { return immsrc_; }
bool getVsetImm() const { return vsetImm_; }
@ -102,6 +109,12 @@ public:
Word getVsew() const { return vsew_; }
Word getVediv() const { return vediv_; }
bool is_FpDest() const { return is_FpDest_; }
bool is_FpSrc(int i) const { return (is_FpSrc_ >> i) & 0x1; }
bool is_VDest() const { return is_VDest_; }
bool is_VSrc(int i) const { return (is_VSrc_ >> i) & 0x1; }
private:
enum {
@ -110,15 +123,18 @@ private:
Opcode opcode_;
int nRsrc_;
int nPsrc_;
bool hasImmSrc_;
bool hasRDest_;
bool hasRDest_;
bool is_FpDest_;
bool is_VDest_;
int is_FpSrc_;
int is_VSrc_;
Word immsrc_;
Word func2_;
Word func3_;
Word func7_;
RegNum rsrc_[MAX_REG_SOURCES];
RegNum rdest_;
int rsrc_[MAX_REG_SOURCES];
int rdest_;
//Vector
bool vsetImm_;

View file

@ -15,8 +15,8 @@ using namespace vortex;
int main(int argc, char **argv) {
std::string archString("rv32i");
int num_cores(1);
std::string archString("rv32imf");
int num_cores(NUM_CORES * NUM_CLUSTERS);
int num_warps(NUM_WARPS);
int num_threads(NUM_THREADS);
std::string imgFileName;
@ -48,7 +48,7 @@ int main(int argc, char **argv) {
ArchDef arch(archString, num_cores, num_warps, num_threads);
Decoder decoder(arch);
MemoryUnit mu(4096, arch.getWordSize(), true);
MemoryUnit mu(4096, arch.wsize(), true);
RAM old_ram;
old_ram.loadHexImpl(imgFileName.c_str());
@ -59,7 +59,7 @@ int main(int argc, char **argv) {
std::vector<std::shared_ptr<Core>> cores(num_cores);
for (int i = 0; i < num_cores; ++i) {
cores[i] = std::make_shared<Core>(arch, decoder, mu);
cores[i] = std::make_shared<Core>(arch, decoder, mu, i);
}
bool running;

View file

@ -1,78 +0,0 @@
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <fstream>
#include <stdlib.h>
#include <sys/stat.h>
#include "debug.h"
#include "types.h"
#include "core.h"
#include "args.h"
using namespace vortex;
int main(int argc, char **argv) {
std::string archString("rv32i");
int num_cores(1);
int num_warps(NUM_WARPS);
int num_threads(NUM_THREADS);
std::string imgFileName;
bool showHelp(false);
bool showStats(false);
/* Read the command line arguments. */
CommandLineArgFlag fh("-h", "--help", "", showHelp);
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archString);
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
CommandLineArgFlag fs("-s", "--stats", "", showStats);
CommandLineArg::readArgs(argc - 1, argv + 1);
if (showHelp || imgFileName.empty()) {
std::cout << "Vortex emulator command line arguments:\n"
" -i, --image <filename> Program RAM image\n"
" -c, --cores <num> Number of cores\n"
" -w, --warps <num> Number of warps\n"
" -t, --threads <num> Number of threads\n"
" -a, --arch <arch string> Architecture string\n"
" -s, --stats Print stats on exit.\n";
return 0;
}
ArchDef arch(archString, num_cores, num_warps, num_threads);
Decoder decoder(arch);
MemoryUnit mu(4096, arch.getWordSize(), true);
RAM old_ram;
old_ram.loadHexImpl(imgFileName.c_str());
mu.attach(old_ram, 0);
struct stat hello;
fstat(0, &hello);
std::vector<std::shared_ptr<Core>> cores(num_cores);
for (int i = 0; i < num_cores; ++i) {
cores[i] = std::make_shared<Core>(arch, decoder, mu);
}
bool running;
do {
running = false;
for (int i = 0; i < num_cores; ++i) {
if (!cores[i]->running())
continue;
running = true;
cores[i]->step();
}
} while (running);
return 0;
}

View file

@ -5,22 +5,27 @@ namespace vortex {
struct trace_inst_t {
// Warp step
bool valid_inst;
unsigned pc;
bool valid;
unsigned PC;
// Core scheduler
int wid;
// Encoder
int rs1;
int rs2;
int rs3;
int rd;
int irs1;
int irs2;
int ird;
//Encoder
int vs1;
int vs2;
int vd;
// Floating-point
int frs1;
int frs2;
int frs3;
int frd;
// Vector extension
int vrs1;
int vrs2;
int vrd;
// Instruction execute
bool is_lw;

View file

@ -1,20 +1,18 @@
#pragma once
#include <stdint.h>
#include <bitset>
#include <VX_config.h>
namespace vortex {
typedef uint8_t Byte;
typedef uint32_t Word;
typedef uint32_t Word_u;
typedef int32_t Word_s;
typedef Word_u Addr;
typedef Word_u Size;
typedef uint32_t Addr;
typedef uint32_t Size;
typedef unsigned RegNum;
typedef unsigned ThdNum;
typedef std::bitset<32> ThreadMask;
enum MemFlags {
RD_USR = 1,

View file

@ -12,15 +12,15 @@ Word vortex::signExt(Word w, Size bit, Word mask) {
return w;
}
void vortex::wordToBytes(Byte *b, Word_u w, Size wordSize) {
void vortex::wordToBytes(Byte *b, Word w, Size wordSize) {
while (wordSize--) {
*(b++) = w & 0xff;
w >>= 8;
}
}
Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
Word_u w = 0;
Word vortex::bytesToWord(const Byte *b, Size wordSize) {
Word w = 0;
b += wordSize-1;
while (wordSize--) {
w <<= 8;
@ -29,15 +29,15 @@ Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
return w;
}
Word_u vortex::flagsToWord(bool r, bool w, bool x) {
Word_u word = 0;
Word vortex::flagsToWord(bool r, bool w, bool x) {
Word word = 0;
if (r) word |= RD_USR;
if (w) word |= WR_USR;
if (x) word |= EX_USR;
return word;
}
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word_u f) {
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word f) {
r = f & RD_USR;
w = f & WR_USR;
x = f & EX_USR;
@ -49,10 +49,10 @@ Byte vortex::readByte(const std::vector<Byte> &b, Size &n) {
return b[n++];
}
Word_u vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
Word vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
if (b.size() - n < wordSize)
throw std::out_of_range("out of range");
Word_u w(0);
Word w(0);
n += wordSize;
// std::cout << "wordSize: " << wordSize << "\n";
for (Size i = 0; i < wordSize; i++) {

View file

@ -12,13 +12,13 @@ void unused(Args&&...) {}
Word signExt(Word w, Size bit, Word mask);
Word_u bytesToWord(const Byte *b, Size wordSize);
void wordToBytes(Byte *b, Word_u w, Size wordSize);
Word_u flagsToWord(bool r, bool w, bool x);
void wordToFlags(bool &r, bool &w, bool &x, Word_u f);
Word bytesToWord(const Byte *b, Size wordSize);
void wordToBytes(Byte *b, Word w, Size wordSize);
Word flagsToWord(bool r, bool w, bool x);
void wordToFlags(bool &r, bool &w, bool &x, Word f);
Byte readByte(const std::vector<Byte> &b, Size &n);
Word_u readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
Word readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
void writeByte(std::vector<Byte> &p, Size &n, Byte b);
void writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w);

View file

@ -2,6 +2,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <assert.h>
#include "util.h"
#include "instr.h"
@ -11,87 +12,67 @@ using namespace vortex;
Warp::Warp(Core *core, Word id)
: id_(id)
, active_(false)
, core_(core)
, pc_(0x80000000)
, shadowPc_(0)
, activeThreads_(0)
, shadowActiveThreads_(0)
, shadowIReg_(core_->arch().getNumRegs())
, VLEN_(1024)
, spawned_(false)
, PC_(0x80000000)
, steps_(0)
, insts_(0)
, loads_(0)
, stores_(0) {
D(3, "Creating a new thread with PC: " << std::hex << pc_);
/* Build the register file. */
Word regNum(0);
for (Word j = 0; j < core_->arch().getNumThreads(); ++j) {
iRegFile_.push_back(std::vector<Reg<Word>>(0));
for (Word i = 0; i < core_->arch().getNumRegs(); ++i) {
iRegFile_[j].push_back(Reg<Word>(id, regNum++));
}
bool act = false;
if (j == 0)
act = true;
tmask_.push_back(act);
shadowTmask_.push_back(act);
}
tmask_.reset();
for (Word i = 0; i < (1 << 12); i++) {
csrs_.push_back(Reg<uint32_t>(id, regNum++));
}
/* Set initial register contents. */
iRegFile_[0][0] = (core_->arch().getNumThreads() << (core_->arch().getWordSize() * 8 / 2)) | id;
iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
csrs_.resize(core_->arch().num_csrs());
}
void Warp::step(trace_inst_t *trace_inst) {
assert(tmask_.any());
Size fetchPos(0);
Size decPos;
Size wordSize(core_->arch().getWordSize());
Size wordSize(core_->arch().wsize());
std::vector<Byte> fetchBuffer(wordSize);
if (activeThreads_ == 0)
return;
++steps_;
D(3, "current PC=0x" << std::hex << pc_);
D(3, "current PC=0x" << std::hex << PC_);
// std::cout << "pc: " << std::hex << pc << "\n";
trace_inst->pc = pc_;
// std::cout << "PC: " << std::hex << PC << "\n";
trace_inst->PC = PC_;
/* Fetch and decode. */
if (wordSize < sizeof(pc_))
pc_ &= ((1ll << (wordSize * 8)) - 1);
if (wordSize < sizeof(PC_))
PC_ &= ((1ll << (wordSize * 8)) - 1);
unsigned fetchSize = 4;
fetchBuffer.resize(fetchSize);
Word fetched = core_->mem().fetch(pc_ + fetchPos, 0);
Word fetched = core_->mem().fetch(PC_ + fetchPos, 0);
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
decPos = 0;
std::shared_ptr<Instr> instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst);
// Update pc
pc_ += decPos;
// Update PC
PC_ += decPos;
// Execute
this->execute(*instr, trace_inst);
// At Debug Level 3, print debug info after each instruction.
D(3, "Register state:");
for (unsigned i = 0; i < iRegFile_[0].size(); ++i) {
D_RAW(" %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
for (unsigned j = 0; j < (activeThreads_); ++j)
D_RAW(' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
D_RAW('(' << shadowIReg_[i] << ')' << std::endl);
D(4, "Register state:");
for (int i = 0; i < core_->arch().num_regs(); ++i) {
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
for (int j = 0; j < core_->arch().num_threads(); ++j) {
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
}
DPN(4, std::endl);
}
DPH(3, "Thread mask:");
for (unsigned i = 0; i < tmask_.size(); ++i)
for (int i = 0; i < core_->arch().num_threads(); ++i)
DPN(3, " " << tmask_[i]);
DPN(3, "\n");
}

View file

@ -7,69 +7,25 @@
namespace vortex {
template <typename T>
class Reg {
public:
Reg()
: value_(0), cpuId_(0), regNum_(0) {}
Reg(Word c, Word n)
: value_(0), cpuId_(c), regNum_(n) {}
Reg(Word c, Word n, T v)
: value_(v), cpuId_(c), regNum_(n) {}
const T &value() const {
return value_;
}
Reg &operator=(T r) {
if (regNum_) {
value_ = r;
doWrite();
}
return *this;
}
operator T() const {
doRead();
return value_;
}
void trunc(Size s) {
Word mask((~0ull >> (sizeof(Word) - s) * 8));
value_ &= mask;
}
private:
T value_;
Word cpuId_, regNum_;
void doWrite() const {}
void doRead() const {}
};
///////////////////////////////////////////////////////////////////////////////
struct DomStackEntry {
DomStackEntry(
unsigned p,
const std::vector<std::vector<Reg<Word>>> &m,
std::vector<bool> &tm,
Word pc
) : pc(pc)
, fallThrough(false)
, uni(false) {
for (unsigned i = 0; i < m.size(); ++i) {
tmask.push_back(!bool(m[i][p]) && tm[i]);
}
}
DomStackEntry(const ThreadMask &tmask, Word PC)
: tmask(tmask)
, PC(PC)
, fallThrough(false)
, unanimous(false)
{}
DomStackEntry(const std::vector<bool> &tmask)
: tmask(tmask), fallThrough(true), uni(false) {}
DomStackEntry(const ThreadMask &tmask)
: tmask(tmask)
, PC(0)
, fallThrough(true)
, unanimous(false)
{}
std::vector<bool> tmask;
Word pc;
ThreadMask tmask;
Word PC;
bool fallThrough;
bool uni;
bool unanimous;
};
struct vtype {
@ -86,11 +42,13 @@ class trace_inst_t;
class Warp {
public:
Warp(Core *core, Word id = 0);
void step(trace_inst_t *);
bool running() const {
return (activeThreads_ != 0);
bool active() const {
return tmask_.any();
}
std::size_t getActiveThreads() const {
return tmask_.count();
}
void printStats() const;
@ -103,68 +61,40 @@ public:
return id_;
}
Word get_pc() const {
return pc_;
Word getPC() const {
return PC_;
}
void set_pc(Word pc) {
pc_ = pc;
}
void setActiveThreads(Size activeThreads) {
activeThreads_ = activeThreads;
}
Size getActiveThreads() const {
return activeThreads_;
}
void setSpawned(bool spawned) {
spawned_ = spawned;
void setPC(Word PC) {
PC_ = PC;
}
void setTmask(size_t index, bool value) {
tmask_[index] = value;
}
void step(trace_inst_t *);
private:
void execute(Instr &instr, trace_inst_t *);
struct MemAccess {
MemAccess(bool w, Word a)
: wr(w), addr(a) {}
bool wr;
Word addr;
};
std::vector<MemAccess> memAccesses_;
Word id_;
bool active_;
Core *core_;
Word pc_;
Word shadowPc_;
Size activeThreads_;
Size shadowActiveThreads_;
std::vector<std::vector<Reg<Word>>> iRegFile_;
std::vector<std::vector<Reg<Word>>> fRegFile_;
std::vector<Reg<uint32_t>> csrs_;
std::vector<bool> tmask_;
std::vector<bool> shadowTmask_;
Word PC_;
ThreadMask tmask_;
std::vector<std::vector<Word>> iRegFile_;
std::vector<std::vector<Word>> fRegFile_;
std::vector<std::vector<Byte>> vRegFile_;
std::vector<Word> csrs_;
std::stack<DomStackEntry> domStack_;
std::vector<Word> shadowIReg_;
std::vector<Word> shadowFReg_;
struct vtype vtype_; // both of them are XLEN WIDE
int vl_; // both of them are XLEN WIDE
Word VLEN_; // total vector length
std::vector<std::vector<Reg<char *>>> vregFile_; // 32 vector registers
bool spawned_;
struct vtype vtype_;
int vl_;
unsigned long steps_;
unsigned long insts_;
unsigned long loads_;