mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
simX refactoring
This commit is contained in:
parent
4fe345f269
commit
a8452483fe
20 changed files with 1198 additions and 1789 deletions
|
@ -185,7 +185,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES;
|
||||
*value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
|
|
|
@ -12,8 +12,8 @@ CXXFLAGS += -DDUMP_PERF_STATS
|
|||
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
|
@ -21,7 +21,7 @@ LDFLAGS += -shared -pthread
|
|||
#LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/instr.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
|
@ -144,19 +144,18 @@ private:
|
|||
void run() {
|
||||
vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS);
|
||||
vortex::Decoder decoder(arch);
|
||||
vortex::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
vortex::MemoryUnit mu(PAGE_SIZE, arch.wsize(), true);
|
||||
mu.attach(ram_, 0);
|
||||
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores(NUM_CORES);
|
||||
for (size_t i = 0; i < NUM_CORES; ++i) {
|
||||
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu);
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores(arch.num_cores());
|
||||
for (int i = 0; i < arch.num_cores(); ++i) {
|
||||
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu, i);
|
||||
}
|
||||
|
||||
bool running;
|
||||
|
||||
do {
|
||||
running = false;
|
||||
for (size_t i = 0; i < NUM_CORES; ++i) {
|
||||
for (int i = 0; i < arch.num_cores(); ++i) {
|
||||
if (!cores[i]->running())
|
||||
continue;
|
||||
running = true;
|
||||
|
@ -236,7 +235,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES;
|
||||
*value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
|
|
|
@ -13,7 +13,7 @@ RTL_DIR = ../hw/rtl
|
|||
|
||||
PROJECT = simX
|
||||
|
||||
SRCS = util.cpp args.cpp mem.cpp core.cpp warp.cpp instr.cpp decode.cpp execute.cpp main.cpp
|
||||
SRCS = util.cpp args.cpp mem.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
160
simX/archdef.h
160
simX/archdef.h
|
@ -11,156 +11,56 @@ namespace vortex {
|
|||
|
||||
class ArchDef {
|
||||
public:
|
||||
struct Undefined {};
|
||||
|
||||
ArchDef(const std::string &s,
|
||||
ArchDef(const std::string &/*arch*/,
|
||||
int num_cores,
|
||||
int num_warps,
|
||||
int num_threads) {
|
||||
std::istringstream iss(s.c_str());
|
||||
wordSize_ = 4;
|
||||
encChar_ = 'w';
|
||||
numRegs_ = 32;
|
||||
numPRegs_ = 0;
|
||||
numCores_ = num_cores;
|
||||
numWarps_ = num_warps;
|
||||
numThreads_ = num_threads;
|
||||
extent_ = EXT_END;
|
||||
int num_threads) {
|
||||
wsize_ = 4;
|
||||
vsize_ = 16;
|
||||
num_regs_ = 32;
|
||||
num_csrs_ = 4096;
|
||||
num_cores_ = num_cores;
|
||||
num_warps_ = num_warps;
|
||||
num_threads_ = num_threads;
|
||||
}
|
||||
|
||||
operator std::string () const {
|
||||
if (extent_ == EXT_NULL)
|
||||
return "";
|
||||
|
||||
std::ostringstream oss;
|
||||
if (extent_ >= EXT_WORDSIZE) oss << wordSize_;
|
||||
if (extent_ >= EXT_ENC ) oss << encChar_;
|
||||
if (extent_ >= EXT_REGS ) oss << numRegs_;
|
||||
if (extent_ >= EXT_PREGS ) oss << '/' << numPRegs_;
|
||||
if (extent_ >= EXT_THREADS ) oss << '/' << numThreads_;
|
||||
if (extent_ >= EXT_WARPS ) oss << '/' << numWarps_;
|
||||
if (extent_ >= EXT_CORES ) oss << '/' << numCores_;
|
||||
|
||||
return oss.str();
|
||||
int wsize() const {
|
||||
return wsize_;
|
||||
}
|
||||
|
||||
bool operator==(const ArchDef &r) const {
|
||||
Extent minExtent(r.extent_ > extent_ ? extent_ : r.extent_);
|
||||
|
||||
// Can't be equal if we can't specify a binary encoding at all.
|
||||
if (minExtent < EXT_PREGS)
|
||||
return false;
|
||||
|
||||
if (minExtent >= EXT_WORDSIZE) {
|
||||
if (wordSize_!=r.wordSize_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_ENC) {
|
||||
if (encChar_ != r.encChar_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_REGS) {
|
||||
if (numRegs_ != r.numRegs_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_PREGS) {
|
||||
if (numPRegs_ != r.numPRegs_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_THREADS) {
|
||||
if (numThreads_ != r.numThreads_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_WARPS) {
|
||||
if (numWarps_ != r.numWarps_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_CORES) {
|
||||
if (numCores_ != r.numCores_)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
int vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
bool operator!=(const ArchDef &r) const {
|
||||
return !(*this == r);
|
||||
int num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
Size getWordSize() const {
|
||||
if (extent_ < EXT_WORDSIZE)
|
||||
throw Undefined();
|
||||
return wordSize_;
|
||||
int num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
char getEncChar() const {
|
||||
if ((extent_ < EXT_ENC) || (encChar_ == 'x'))
|
||||
throw Undefined();
|
||||
return encChar_;
|
||||
int num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
RegNum getNumRegs() const {
|
||||
if (extent_ < EXT_REGS)
|
||||
throw Undefined();
|
||||
return numRegs_;
|
||||
int num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
RegNum getNumPRegs() const {
|
||||
if (extent_ < EXT_PREGS)
|
||||
throw Undefined();
|
||||
return numPRegs_;
|
||||
}
|
||||
|
||||
ThdNum getNumThreads() const {
|
||||
if (extent_ < EXT_THREADS)
|
||||
throw Undefined();
|
||||
return numThreads_;
|
||||
}
|
||||
|
||||
ThdNum getNumWarps() const {
|
||||
if (extent_ < EXT_WARPS)
|
||||
throw Undefined();
|
||||
return numWarps_;
|
||||
}
|
||||
|
||||
ThdNum getNumCores() const {
|
||||
if (extent_ < EXT_CORES)
|
||||
throw Undefined();
|
||||
return numCores_;
|
||||
}
|
||||
|
||||
bool is_cpu_mode() const {
|
||||
return cpu_mode_;
|
||||
int num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
|
||||
private:
|
||||
enum Extent {
|
||||
EXT_NULL,
|
||||
EXT_WORDSIZE,
|
||||
EXT_ENC,
|
||||
EXT_REGS,
|
||||
EXT_PREGS,
|
||||
EXT_THREADS,
|
||||
EXT_WARPS,
|
||||
EXT_CORES,
|
||||
EXT_END
|
||||
};
|
||||
|
||||
Extent extent_;
|
||||
Size wordSize_;
|
||||
ThdNum numThreads_;
|
||||
ThdNum numWarps_;
|
||||
ThdNum numCores_;
|
||||
RegNum numRegs_;
|
||||
ThdNum numPRegs_;
|
||||
char encChar_;
|
||||
bool cpu_mode_;
|
||||
int wsize_;
|
||||
int vsize_;
|
||||
int num_regs_;
|
||||
int num_csrs_;
|
||||
int num_threads_;
|
||||
int num_warps_;
|
||||
int num_cores_;
|
||||
};
|
||||
|
||||
}
|
381
simX/core.cpp
381
simX/core.cpp
|
@ -1,10 +1,7 @@
|
|||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
|
||||
// #define USE_DEBUG 7
|
||||
// #define PRINT_ACTIVE_THREADS
|
||||
|
||||
#include <assert.h>
|
||||
#include "types.h"
|
||||
#include "util.h"
|
||||
#include "archdef.h"
|
||||
|
@ -14,21 +11,25 @@
|
|||
#include "debug.h"
|
||||
|
||||
#define INIT_TRACE(trace_inst) \
|
||||
trace_inst.valid_inst = false; \
|
||||
trace_inst.pc = 0; \
|
||||
trace_inst.valid = false; \
|
||||
trace_inst.PC = 0; \
|
||||
trace_inst.wid = schedule_w_; \
|
||||
trace_inst.rs1 = -1; \
|
||||
trace_inst.rs2 = -1; \
|
||||
trace_inst.rd = -1; \
|
||||
trace_inst.vs1 = -1; \
|
||||
trace_inst.vs2 = -1; \
|
||||
trace_inst.vd = -1; \
|
||||
trace_inst.irs1 = -1; \
|
||||
trace_inst.irs2 = -1; \
|
||||
trace_inst.frs1 = -1; \
|
||||
trace_inst.frs2 = -1; \
|
||||
trace_inst.frs3 = -1; \
|
||||
trace_inst.frd = -1; \
|
||||
trace_inst.ird = -1; \
|
||||
trace_inst.vrs1 = -1; \
|
||||
trace_inst.vrs2 = -1; \
|
||||
trace_inst.vrd = -1; \
|
||||
trace_inst.is_lw = false; \
|
||||
trace_inst.is_sw = false; \
|
||||
if (trace_inst.mem_addresses != NULL) \
|
||||
free(trace_inst.mem_addresses); \
|
||||
trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \
|
||||
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++) \
|
||||
for (int tid = 0; tid < arch_.num_threads(); tid++) \
|
||||
trace_inst.mem_addresses[tid] = 0xdeadbeef; \
|
||||
trace_inst.mem_stall_cycles = 0; \
|
||||
trace_inst.fetch_stall_cycles = 0; \
|
||||
|
@ -37,18 +38,22 @@
|
|||
trace_inst.stalled = false;
|
||||
|
||||
#define CPY_TRACE(drain, source) \
|
||||
drain.valid_inst = source.valid_inst; \
|
||||
drain.pc = source.pc; \
|
||||
drain.valid = source.valid; \
|
||||
drain.PC = source.PC; \
|
||||
drain.wid = source.wid; \
|
||||
drain.rs1 = source.rs1; \
|
||||
drain.rs2 = source.rs2; \
|
||||
drain.rd = source.rd; \
|
||||
drain.vs1 = source.vs1; \
|
||||
drain.vs2 = source.vs2; \
|
||||
drain.vd = source.vd; \
|
||||
drain.irs1 = source.irs1; \
|
||||
drain.irs2 = source.irs2; \
|
||||
drain.ird = source.ird; \
|
||||
drain.frs1 = source.frs1; \
|
||||
drain.frs2 = source.frs2; \
|
||||
drain.frs3 = source.frs3; \
|
||||
drain.frd = source.frd; \
|
||||
drain.vrs1 = source.vrs1; \
|
||||
drain.vrs2 = source.vrs2; \
|
||||
drain.vrd = source.vrd; \
|
||||
drain.is_lw = source.is_lw; \
|
||||
drain.is_sw = source.is_sw; \
|
||||
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++)\
|
||||
for (int tid = 0; tid < arch_.num_threads(); tid++) \
|
||||
drain.mem_addresses[tid] = source.mem_addresses[tid]; \
|
||||
drain.mem_stall_cycles = source.mem_stall_cycles; \
|
||||
drain.fetch_stall_cycles = source.fetch_stall_cycles; \
|
||||
|
@ -60,17 +65,17 @@ using namespace vortex;
|
|||
|
||||
void printTrace(trace_inst_t *trace, const char *stage_name) {
|
||||
__unused(trace, stage_name);
|
||||
D(3, stage_name << ": valid=" << trace->valid_inst);
|
||||
D(3, stage_name << ": PC=" << std::hex << trace->pc << std::dec);
|
||||
D(3, stage_name << ": wid=" << trace->wid);
|
||||
D(3, stage_name << ": rd=" << trace->rd << ", rs1=" << trace->rs1 << ", trs2=" << trace->rs2);
|
||||
D(3, stage_name << ": is_lw=" << trace->is_lw);
|
||||
D(3, stage_name << ": is_sw=" << trace->is_sw);
|
||||
D(3, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
|
||||
D(3, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
|
||||
D(3, stage_name << ": stall_warp=" << trace->stall_warp);
|
||||
D(3, stage_name << ": wspawn=" << trace->wspawn);
|
||||
D(3, stage_name << ": stalled=" << trace->stalled);
|
||||
D(4, stage_name << ": valid=" << trace->valid);
|
||||
D(4, stage_name << ": PC=" << std::hex << trace->PC << std::dec);
|
||||
D(4, stage_name << ": wid=" << trace->wid);
|
||||
D(4, stage_name << ": rd=" << trace->ird << ", rs1=" << trace->irs1 << ", trs2=" << trace->irs2);
|
||||
D(4, stage_name << ": is_lw=" << trace->is_lw);
|
||||
D(4, stage_name << ": is_sw=" << trace->is_sw);
|
||||
D(4, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
|
||||
D(4, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
|
||||
D(4, stage_name << ": stall_warp=" << trace->stall_warp);
|
||||
D(4, stage_name << ": wspawn=" << trace->wspawn);
|
||||
D(4, stage_name << ": stalled=" << trace->stalled);
|
||||
}
|
||||
|
||||
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
|
@ -79,8 +84,7 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
|||
, decoder_(decoder)
|
||||
, mem_(mem)
|
||||
, steps_(0)
|
||||
, num_instructions_(0) {
|
||||
release_warp_ = false;
|
||||
, num_insts_(0) {
|
||||
foundSchedule_ = true;
|
||||
schedule_w_ = 0;
|
||||
|
||||
|
@ -98,23 +102,17 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
|||
INIT_TRACE(inst_in_lsu_);
|
||||
INIT_TRACE(inst_in_wb_);
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
stalled_warps_[i] = false;
|
||||
for (int j = 0; j < 32; j++) {
|
||||
renameTable_[i][j] = true;
|
||||
}
|
||||
iRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
|
||||
fRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
|
||||
vRenameTable_.resize(arch.num_regs(), false);
|
||||
|
||||
stalled_warps_.resize(arch.num_warps(), false);
|
||||
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
warps_.emplace_back(this, i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
vecRenameTable_[i] = true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < arch_.getNumWarps(); ++i) {
|
||||
warps_.push_back(Warp(this, i));
|
||||
}
|
||||
|
||||
warps_[0].setActiveThreads(1);
|
||||
warps_[0].setSpawned(true);
|
||||
warps_[0].setTmask(0, true);
|
||||
}
|
||||
|
||||
Core::~Core() {
|
||||
|
@ -125,32 +123,20 @@ void Core::step() {
|
|||
D(3, "###########################################################");
|
||||
|
||||
steps_++;
|
||||
D(3, "cycle: " << steps_);
|
||||
D(3, std::dec << "Core" << id_ << ": cycle: " << steps_);
|
||||
|
||||
DPH(3, "stalled warps:");
|
||||
for (ThdNum widd = 0; widd < arch_.getNumWarps(); widd++) {
|
||||
DPN(3, " " << stalled_warps_[widd]);
|
||||
for (int i = 0; i < arch_.num_warps(); i++) {
|
||||
DPN(3, " " << stalled_warps_[i]);
|
||||
}
|
||||
DPN(3, "\n");
|
||||
|
||||
// cout << "About to call writeback" << std::endl;
|
||||
this->writeback();
|
||||
// cout << "About to call load_store" << std::endl;
|
||||
this->load_store();
|
||||
// cout << "About to call execute_unit" << std::endl;
|
||||
this->execute_unit();
|
||||
// cout << "About to call scheduler" << std::endl;
|
||||
this->scheduler();
|
||||
// cout << "About to call decode" << std::endl;
|
||||
this->decode();
|
||||
// D(3, "About to call fetch" << std::flush);
|
||||
this->fetch();
|
||||
// D(3, "Finished fetch" << std::flush);
|
||||
|
||||
if (release_warp_) {
|
||||
release_warp_ = false;
|
||||
stalled_warps_[release_warp_num_] = false;
|
||||
}
|
||||
|
||||
DPN(3, std::flush);
|
||||
}
|
||||
|
@ -161,10 +147,8 @@ void Core::warpScheduler() {
|
|||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
// round robin scheduling
|
||||
next_warp = (next_warp + 1) % warps_.size();
|
||||
|
||||
bool has_active_threads = (warps_[next_warp].getActiveThreads() > 0);
|
||||
bool has_active_threads = warps_[next_warp].active();
|
||||
bool stalled = stalled_warps_[next_warp];
|
||||
|
||||
if (has_active_threads && !stalled) {
|
||||
foundSchedule_ = true;
|
||||
break;
|
||||
|
@ -174,35 +158,28 @@ void Core::warpScheduler() {
|
|||
}
|
||||
|
||||
void Core::fetch() {
|
||||
|
||||
// D(-1, "Found schedule: " << foundSchedule_);
|
||||
|
||||
if ((!inst_in_scheduler_.stalled)
|
||||
&& (inst_in_fetch_.fetch_stall_cycles == 0)) {
|
||||
// CPY_TRACE(inst_in_decode_, inst_in_fetch_);
|
||||
// if (warps_[schedule_w_].activeThreads)
|
||||
{
|
||||
INIT_TRACE(inst_in_fetch_);
|
||||
INIT_TRACE(inst_in_fetch_);
|
||||
|
||||
if (foundSchedule_) {
|
||||
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
|
||||
if (foundSchedule_) {
|
||||
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
|
||||
num_insts_ = num_insts_ + warps_[schedule_w_].getActiveThreads();
|
||||
|
||||
num_instructions_ = num_instructions_ + warps_[schedule_w_].getActiveThreads();
|
||||
warps_[schedule_w_].step(&inst_in_fetch_);
|
||||
warps_[schedule_w_].step(&inst_in_fetch_);
|
||||
|
||||
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
|
||||
}
|
||||
|
||||
this->getCacheDelays(&inst_in_fetch_);
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
stalled_warps_[inst_in_fetch_.wid] = true;
|
||||
}
|
||||
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
|
||||
}
|
||||
|
||||
this->getCacheDelays(&inst_in_fetch_);
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
stalled_warps_[inst_in_fetch_.wid] = true;
|
||||
}
|
||||
this->warpScheduler();
|
||||
}
|
||||
this->warpScheduler();
|
||||
} else {
|
||||
inst_in_fetch_.stalled = false;
|
||||
if (inst_in_fetch_.fetch_stall_cycles > 0)
|
||||
|
@ -218,7 +195,6 @@ void Core::decode() {
|
|||
CPY_TRACE(inst_in_decode_, inst_in_fetch_);
|
||||
INIT_TRACE(inst_in_fetch_);
|
||||
}
|
||||
//printTrace(&inst_in_decode_, "Decode");
|
||||
}
|
||||
|
||||
void Core::scheduler() {
|
||||
|
@ -226,136 +202,162 @@ void Core::scheduler() {
|
|||
CPY_TRACE(inst_in_scheduler_, inst_in_decode_);
|
||||
INIT_TRACE(inst_in_decode_);
|
||||
}
|
||||
//printTrace(&inst_in_scheduler_, "Scheduler");
|
||||
}
|
||||
|
||||
void Core::load_store() {
|
||||
if ((inst_in_lsu_.mem_stall_cycles > 0) || (inst_in_lsu_.stalled)) {
|
||||
if ((inst_in_lsu_.mem_stall_cycles > 0) || inst_in_lsu_.stalled) {
|
||||
// LSU currently busy
|
||||
if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) {
|
||||
inst_in_scheduler_.stalled = true;
|
||||
}
|
||||
} else {
|
||||
// LSU not busy
|
||||
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) {
|
||||
// Scheduler has LSU inst
|
||||
bool scheduler_srcs_ready = true;
|
||||
if (inst_in_scheduler_.rs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
|
||||
}
|
||||
if (!inst_in_scheduler_.is_lw && !inst_in_scheduler_.is_sw)
|
||||
return;
|
||||
|
||||
if (inst_in_scheduler_.rs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2];
|
||||
}
|
||||
// Scheduler has LSU inst
|
||||
bool scheduler_srcs_busy = false;
|
||||
|
||||
if (inst_in_scheduler_.vs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
|
||||
}
|
||||
if (inst_in_scheduler_.vs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
|
||||
}
|
||||
if (inst_in_scheduler_.irs1 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
|
||||
}
|
||||
|
||||
if (scheduler_srcs_ready) {
|
||||
if (inst_in_scheduler_.rd != -1)
|
||||
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
|
||||
if (inst_in_scheduler_.rd != -1)
|
||||
vecRenameTable_[inst_in_scheduler_.vd] = false;
|
||||
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
} else {
|
||||
inst_in_scheduler_.stalled = true;
|
||||
// INIT_TRACE(inst_in_lsu_);
|
||||
}
|
||||
} else {
|
||||
// INIT_TRACE(inst_in_lsu_);
|
||||
if (inst_in_scheduler_.irs2 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs3 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
|
||||
}
|
||||
if (inst_in_scheduler_.vrs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
|
||||
}
|
||||
|
||||
if (scheduler_srcs_busy) {
|
||||
inst_in_scheduler_.stalled = true;
|
||||
} else {
|
||||
if (inst_in_scheduler_.ird > 0)
|
||||
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
|
||||
|
||||
if (inst_in_scheduler_.frd >= 0)
|
||||
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
|
||||
|
||||
if (inst_in_scheduler_.vrd >= 0)
|
||||
vRenameTable_[inst_in_scheduler_.vrd] = true;
|
||||
|
||||
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
}
|
||||
}
|
||||
|
||||
if (inst_in_lsu_.mem_stall_cycles > 0)
|
||||
inst_in_lsu_.mem_stall_cycles--;
|
||||
|
||||
//printTrace(&inst_in_lsu_, "LSU");
|
||||
}
|
||||
|
||||
void Core::execute_unit() {
|
||||
// EXEC is always not busy
|
||||
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) {
|
||||
// Not an execute instruction
|
||||
// INIT_TRACE(inst_in_exe_);
|
||||
} else {
|
||||
bool scheduler_srcs_ready = true;
|
||||
if (inst_in_scheduler_.rs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
|
||||
// cout << "Rename RS1: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1] << " wid: " << inst_in_scheduler_.wid << '\n';
|
||||
}
|
||||
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)
|
||||
return;
|
||||
|
||||
bool scheduler_srcs_busy = false;
|
||||
|
||||
if (inst_in_scheduler_.rs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2];
|
||||
// cout << "Rename RS2: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2] << " wid: " << inst_in_scheduler_.wid << '\n';
|
||||
}
|
||||
|
||||
// cout << "About to check vs*\n" << std::flush;
|
||||
if (inst_in_scheduler_.vs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
|
||||
}
|
||||
if (inst_in_scheduler_.vs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
|
||||
}
|
||||
// cout << "Finished sources\n" << std::flush;
|
||||
|
||||
if (scheduler_srcs_ready) {
|
||||
if (inst_in_scheduler_.rd != -1) {
|
||||
// cout << "rename setting rd: " << inst_in_scheduler_.rd << " to not useabel wid: " << inst_in_scheduler_.wid << '\n';
|
||||
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
|
||||
}
|
||||
|
||||
// cout << "About to check vector wb: " << inst_in_scheduler_.vd << "\n" << std::flush;
|
||||
if (inst_in_scheduler_.vd != -1) {
|
||||
vecRenameTable_[inst_in_scheduler_.vd] = false;
|
||||
}
|
||||
// cout << "Finished wb checking" << "\n" << std::flush;
|
||||
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
// cout << "Finished trace copying and clearning" << "\n" << std::flush;
|
||||
} else {
|
||||
D(3, "Execute: srcs not ready!");
|
||||
inst_in_scheduler_.stalled = true;
|
||||
// INIT_TRACE(inst_in_exe_);
|
||||
}
|
||||
if (inst_in_scheduler_.irs1 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
|
||||
}
|
||||
|
||||
//printTrace(&inst_in_exe_, "EXE");
|
||||
// INIT_TRACE(inst_in_exe_);
|
||||
if (inst_in_scheduler_.irs2 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs3 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
|
||||
}
|
||||
|
||||
if (scheduler_srcs_busy) {
|
||||
D(3, "Execute: srcs not ready!");
|
||||
inst_in_scheduler_.stalled = true;
|
||||
} else {
|
||||
if (inst_in_scheduler_.ird > 0) {
|
||||
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frd >= 0) {
|
||||
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrd >= 0) {
|
||||
vRenameTable_[inst_in_scheduler_.vrd] = true;
|
||||
}
|
||||
|
||||
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
}
|
||||
}
|
||||
|
||||
void Core::writeback() {
|
||||
if (inst_in_wb_.rd > 0)
|
||||
renameTable_[inst_in_wb_.wid][inst_in_wb_.rd] = true;
|
||||
if (inst_in_wb_.vd > 0)
|
||||
vecRenameTable_[inst_in_wb_.vd] = true;
|
||||
if (inst_in_wb_.ird > 0) {
|
||||
iRenameTable_[inst_in_wb_.wid][inst_in_wb_.ird] = false;
|
||||
}
|
||||
|
||||
if (inst_in_wb_.frd >= 0) {
|
||||
fRenameTable_[inst_in_wb_.wid][inst_in_wb_.frd] = false;
|
||||
}
|
||||
|
||||
if (inst_in_wb_.vrd >= 0) {
|
||||
vRenameTable_[inst_in_wb_.vrd] = false;
|
||||
}
|
||||
|
||||
if (inst_in_wb_.stall_warp) {
|
||||
stalled_warps_[inst_in_wb_.wid] = false;
|
||||
// release_warp_ = true;
|
||||
// release_warp_num_ = inst_in_wb_.wid;
|
||||
}
|
||||
|
||||
INIT_TRACE(inst_in_wb_);
|
||||
|
||||
bool serviced_exe = false;
|
||||
if ((inst_in_exe_.rd > 0) || (inst_in_exe_.stall_warp)) {
|
||||
if ((inst_in_exe_.ird > 0)
|
||||
|| (inst_in_exe_.frd >= 0)
|
||||
|| (inst_in_exe_.vrd >= 0)
|
||||
|| (inst_in_exe_.stall_warp)) {
|
||||
CPY_TRACE(inst_in_wb_, inst_in_exe_);
|
||||
INIT_TRACE(inst_in_exe_);
|
||||
serviced_exe = true;
|
||||
// cout << "WRITEBACK SERVICED EXE\n";
|
||||
}
|
||||
|
||||
if (inst_in_lsu_.is_sw) {
|
||||
INIT_TRACE(inst_in_lsu_);
|
||||
} else {
|
||||
if (((inst_in_lsu_.rd > 0) || (inst_in_lsu_.vd > 0)) && (inst_in_lsu_.mem_stall_cycles == 0)) {
|
||||
if (((inst_in_lsu_.ird > 0)
|
||||
|| (inst_in_lsu_.frd >= 0)
|
||||
|| (inst_in_lsu_.vrd >= 0))
|
||||
&& (inst_in_lsu_.mem_stall_cycles == 0)) {
|
||||
if (serviced_exe) {
|
||||
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used");
|
||||
// Stalling LSU because EXE is busy
|
||||
inst_in_lsu_.stalled = true;
|
||||
} else {
|
||||
CPY_TRACE(inst_in_wb_, inst_in_lsu_);
|
||||
|
@ -366,27 +368,28 @@ void Core::writeback() {
|
|||
}
|
||||
|
||||
void Core::getCacheDelays(trace_inst_t *trace_inst) {
|
||||
trace_inst->fetch_stall_cycles += 3;
|
||||
trace_inst->fetch_stall_cycles += 1;
|
||||
if (trace_inst->is_sw || trace_inst->is_lw) {
|
||||
trace_inst->mem_stall_cycles += 5;
|
||||
trace_inst->mem_stall_cycles += 3;
|
||||
}
|
||||
}
|
||||
|
||||
bool Core::running() const {
|
||||
bool stages_have_valid = inst_in_fetch_.valid_inst
|
||||
|| inst_in_decode_.valid_inst
|
||||
|| inst_in_scheduler_.valid_inst
|
||||
|| inst_in_lsu_.valid_inst
|
||||
|| inst_in_exe_.valid_inst
|
||||
|| inst_in_wb_.valid_inst;
|
||||
bool stages_have_valid = inst_in_fetch_.valid
|
||||
|| inst_in_decode_.valid
|
||||
|| inst_in_scheduler_.valid
|
||||
|| inst_in_lsu_.valid
|
||||
|| inst_in_exe_.valid
|
||||
|| inst_in_wb_.valid;
|
||||
|
||||
if (stages_have_valid)
|
||||
return true;
|
||||
|
||||
for (unsigned i = 0; i < warps_.size(); ++i)
|
||||
if (warps_[i].running()) {
|
||||
for (unsigned i = 0; i < warps_.size(); ++i) {
|
||||
if (warps_[i].active()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
15
simX/core.h
15
simX/core.h
|
@ -60,8 +60,8 @@ public:
|
|||
return interruptEntry_;
|
||||
}
|
||||
|
||||
unsigned long num_instructions() const {
|
||||
return num_instructions_;
|
||||
unsigned long num_insts() const {
|
||||
return num_insts_;
|
||||
}
|
||||
|
||||
unsigned long num_steps() const {
|
||||
|
@ -70,9 +70,10 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
bool renameTable_[32][32];
|
||||
bool vecRenameTable_[32];
|
||||
bool stalled_warps_[32];
|
||||
std::vector<std::vector<bool>> iRenameTable_;
|
||||
std::vector<std::vector<bool>> fRenameTable_;
|
||||
std::vector<bool> vRenameTable_;
|
||||
std::vector<bool> stalled_warps_;
|
||||
bool foundSchedule_;
|
||||
|
||||
Word id_;
|
||||
|
@ -83,10 +84,8 @@ private:
|
|||
std::unordered_map<Word, std::set<Warp *>> barriers_;
|
||||
int schedule_w_;
|
||||
uint64_t steps_;
|
||||
uint64_t num_instructions_;
|
||||
uint64_t num_insts_;
|
||||
Word interruptEntry_;
|
||||
bool release_warp_;
|
||||
int release_warp_num_;
|
||||
|
||||
trace_inst_t inst_in_fetch_;
|
||||
trace_inst_t inst_in_decode_;
|
||||
|
|
12
simX/debug.h
12
simX/debug.h
|
@ -1,6 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
//#define USE_DEBUG 9
|
||||
#define USE_DEBUG 3
|
||||
#define DEBUG_HEADER << "DEBUG "
|
||||
//#define DEBUG_HEADER << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": "
|
||||
|
||||
#ifdef USE_DEBUG
|
||||
|
||||
|
@ -11,13 +13,13 @@
|
|||
|
||||
#define D(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x << std::endl; \
|
||||
std::cout DEBUG_HEADER << x << std::endl; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define DPH(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x; \
|
||||
std::cout DEBUG_HEADER << x; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
@ -27,10 +29,6 @@
|
|||
} \
|
||||
} while(0)
|
||||
|
||||
#define D_RAW(x) do { \
|
||||
std::cout << x; \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define DX(x)
|
||||
|
|
163
simX/decode.cpp
163
simX/decode.cpp
|
@ -54,7 +54,7 @@ std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
|
|||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef &arch) {
|
||||
inst_s_ = arch.getWordSize() * 8;
|
||||
inst_s_ = arch.wsize() * 8;
|
||||
opcode_s_ = 7;
|
||||
reg_s_ = 5;
|
||||
func2_s_ = 2;
|
||||
|
@ -94,7 +94,11 @@ Decoder::Decoder(const ArchDef &arch) {
|
|||
v_imm_mask_ = 0x7ff;
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, trace_inst_t *trace_inst) {
|
||||
std::shared_ptr<Instr> Decoder::decode(
|
||||
const std::vector<Byte> &v,
|
||||
Size &idx,
|
||||
trace_inst_t *trace_inst)
|
||||
{
|
||||
Word code(readWord(v, idx, inst_s_ / 8));
|
||||
|
||||
// std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n";
|
||||
|
@ -107,12 +111,13 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
|||
Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5,
|
||||
bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3;
|
||||
|
||||
InstType curInstType = sc_instTable.at(op).iType; // get current inst type
|
||||
if (op == Opcode::FL || op == Opcode::FS) { // need to find out whether it is vector or floating point inst
|
||||
InstType curInstType = sc_instTable.at(op).iType;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
// need to find out whether it is vector or floating point inst
|
||||
Word width_bits = (code >> shift_func3_) & func3_mask_;
|
||||
if ((width_bits == 0x1) || (width_bits == 0x2)
|
||||
|| (width_bits == 0x3) || (width_bits == 0x4)) {
|
||||
curInstType = (op == Opcode::FL)? InstType::I_TYPE : InstType::S_TYPE;
|
||||
curInstType = (op == Opcode::FL) ? InstType::I_TYPE : InstType::S_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,52 +127,50 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
|||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
if (op == Opcode::FCI) {
|
||||
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
|
||||
} else {
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
instr->setFunc7((code >> shift_func7_) & func7_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
if (op == Opcode::FCI || op == Opcode::FL) {
|
||||
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
} else {
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
}
|
||||
instr->setFunc7((code >> shift_func7_) & func7_mask_);
|
||||
func3 = (code >> shift_func3_) & func3_mask_;
|
||||
instr->setFunc3(func3);
|
||||
|
||||
if ((func3 == 5) && (op != L_INST) && (op != FL)) {
|
||||
// std::cout << "func7: " << func7 << "\n";
|
||||
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
|
||||
instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_));
|
||||
} else {
|
||||
instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_));
|
||||
}
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::S_TYPE:
|
||||
// std::cout << "************STORE\n";
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
if (op == Opcode::FS) {
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
|
||||
dest_bits = (code >> shift_rd_) & reg_mask_;
|
||||
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
|
||||
imeed = (imm_bits << reg_s_) | dest_bits;
|
||||
// std::cout << "ENC: store imeed: " << imeed << "\n";
|
||||
instr->setSrcImm(signExt(imeed, 12, s_imm_mask_));
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::B_TYPE:
|
||||
|
@ -184,51 +187,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
|||
bit_12 = imm_bits >> 6;
|
||||
|
||||
imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
|
||||
instr->setSrcImm(signExt(imeed, 13, b_imm_mask_));
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::U_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_));
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::J_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
|
||||
// [20 | 10:1 | 11 | 19:12]
|
||||
|
||||
unordered = code >> shift_j_u_immed_;
|
||||
|
||||
bits_19_12 = unordered & 0xff;
|
||||
bit_11 = (unordered >> 8) & 0x1;
|
||||
bits_10_1 = (unordered >> 9) & 0x3ff;
|
||||
bit_20 = (unordered >> 19) & 0x1;
|
||||
|
||||
imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
|
||||
if (bit_20) {
|
||||
imeed |= ~j_imm_mask_;
|
||||
}
|
||||
|
||||
instr->setSrcImm(imeed);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::V_TYPE:
|
||||
D(3, "Entered here: instr type = vector" << op);
|
||||
switch (op) {
|
||||
case Opcode::VSET_ARITH: //TODO: arithmetic ops
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
|
||||
func3 = (code >> shift_func3_) & func3_mask_;
|
||||
instr->setFunc3(func3);
|
||||
D(3, "Entered here: instr type = vector");
|
||||
|
@ -247,53 +233,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
|||
instr->setVsew((immed >> 2) & 0x3);
|
||||
D(3, "sew " << ((immed >> 2) & 0x3));
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_) & 0x1);
|
||||
instr->setFunc6((code >> shift_func6_) & func6_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
}
|
||||
break;
|
||||
|
||||
case Opcode::VL:
|
||||
D(3, "vector load instr");
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_));
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->vd = ((code >> shift_rd_) & reg_mask_);
|
||||
//trace_inst->vs2 = ((code>>shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case Opcode::VS:
|
||||
instr->setVs3((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_));
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
//trace_inst->vd = ((code>>shift_rd_) & reg_mask_);
|
||||
trace_inst->vs1 = ((code >> shift_rd_) & reg_mask_); //vs3
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -303,23 +270,47 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
|||
break;
|
||||
case R4_TYPE:
|
||||
// RT: add R4_TYPE decoder
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs3_) & reg_mask_);
|
||||
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs3_) & reg_mask_);
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rs3 = ((code >> shift_rs3_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unrecognized argument class in word decoder.\n";
|
||||
std::abort();
|
||||
}
|
||||
|
||||
if (curInstType != InstType::N_TYPE) {
|
||||
trace_inst->valid = true;
|
||||
if (instr->hasRDest()) {
|
||||
if (instr->is_FpDest()) {
|
||||
trace_inst->frd = instr->getRDest();
|
||||
} else if (instr->is_VDest()) {
|
||||
trace_inst->vrd = instr->getRDest();
|
||||
} else {
|
||||
trace_inst->ird = instr->getRDest();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < instr->getNRSrc(); ++i) {
|
||||
if (instr->is_FpSrc(i)) {
|
||||
if (i == 0) trace_inst->frs1 = instr->getRSrc(i);
|
||||
else if (i == 1) trace_inst->frs2 = instr->getRSrc(i);
|
||||
else if (i == 2) trace_inst->frs3 = instr->getRSrc(i);
|
||||
else std::abort();
|
||||
} else if (instr->is_VSrc(i)) {
|
||||
if (i == 0) trace_inst->vrs1 = instr->getRSrc(i);
|
||||
else if (i == 1) trace_inst->vrs2 = instr->getRSrc(i);
|
||||
else std::abort();
|
||||
} else {
|
||||
if (i == 0) trace_inst->irs1 = instr->getRSrc(i);
|
||||
else if (i == 1) trace_inst->irs2 = instr->getRSrc(i);
|
||||
else std::abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush);
|
||||
|
||||
return instr;
|
||||
|
|
1798
simX/execute.cpp
1798
simX/execute.cpp
File diff suppressed because it is too large
Load diff
|
@ -1,23 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "instr.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
void Instr::setVlmul(Word lmul) {
|
||||
vlmul_ = std::pow(2, lmul);
|
||||
}
|
||||
|
||||
void Instr::setVsew(Word sew) {
|
||||
vsew_ = std::pow(2, 3+sew);
|
||||
}
|
||||
|
||||
void Instr::setVediv(Word ediv) {
|
||||
vediv_ = std::pow(2,ediv);
|
||||
}
|
44
simX/instr.h
44
simX/instr.h
|
@ -52,9 +52,12 @@ public:
|
|||
Instr()
|
||||
: opcode_(Opcode::NOP)
|
||||
, nRsrc_(0)
|
||||
, nPsrc_(0)
|
||||
, hasImmSrc_(false)
|
||||
, hasRDest_(false)
|
||||
, is_FpDest_(false)
|
||||
, is_VDest_(false)
|
||||
, is_FpSrc_(0)
|
||||
, is_VSrc_(0)
|
||||
, func2_(0)
|
||||
, func3_(0)
|
||||
, func7_(0)
|
||||
|
@ -65,20 +68,24 @@ public:
|
|||
|
||||
/* Setters used to "craft" the instruction. */
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setDestReg(RegNum destReg) { hasRDest_ = true; rdest_ = destReg; }
|
||||
void setSrcReg(RegNum srcReg) { rsrc_[nRsrc_++] = srcReg; }
|
||||
void setDestReg(int destReg) { hasRDest_ = true; rdest_ = destReg; }
|
||||
void setSrcReg(int srcReg) { rsrc_[nRsrc_++] = srcReg; }
|
||||
void setDestFReg(int destReg) { hasRDest_ = true; is_FpDest_ = true; rdest_ = destReg; }
|
||||
void setSrcFReg(int srcReg) { is_FpSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
|
||||
void setDestVReg(int destReg) { hasRDest_ = true; is_VDest_ = true; rdest_ = destReg; }
|
||||
void setSrcVReg(int srcReg) { is_VSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
|
||||
void setFunc3(Word func3) { func3_ = func3; }
|
||||
void setFunc7(Word func7) { func7_ = func7; }
|
||||
void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; }
|
||||
void setVsetImm(Word vset_imm) { if(vset_imm) vsetImm_ = true; else vsetImm_ = false; }
|
||||
void setVsetImm(Word vset_imm) { if (vset_imm) vsetImm_ = true; else vsetImm_ = false; }
|
||||
void setVlsWidth(Word width) { vlsWidth_ = width; }
|
||||
void setVmop(Word mop) { vMop_ = mop; }
|
||||
void setVnf(Word nf) { vNf_ = nf; }
|
||||
void setVmask(Word mask) { vmask_ = mask; }
|
||||
void setVs3(Word vs) { vs3_ = vs; }
|
||||
void setVlmul(Word lmul);
|
||||
void setVsew(Word sew);
|
||||
void setVediv(Word ediv);
|
||||
void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
|
||||
void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
|
||||
void setVediv(Word ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(Word func6) { func6_ = func6; }
|
||||
|
||||
/* Getters used by encoders. */
|
||||
|
@ -86,10 +93,10 @@ public:
|
|||
Word getFunc3() const { return func3_; }
|
||||
Word getFunc6() const { return func6_; }
|
||||
Word getFunc7() const { return func7_; }
|
||||
RegNum getNRSrc() const { return nRsrc_; }
|
||||
RegNum getRSrc(RegNum i) const { return rsrc_[i]; }
|
||||
int getNRSrc() const { return nRsrc_; }
|
||||
int getRSrc(int i) const { return rsrc_[i]; }
|
||||
bool hasRDest() const { return hasRDest_; }
|
||||
RegNum getRDest() const { return rdest_; }
|
||||
int getRDest() const { return rdest_; }
|
||||
bool hasImm() const { return hasImmSrc_; }
|
||||
Word getImm() const { return immsrc_; }
|
||||
bool getVsetImm() const { return vsetImm_; }
|
||||
|
@ -102,6 +109,12 @@ public:
|
|||
Word getVsew() const { return vsew_; }
|
||||
Word getVediv() const { return vediv_; }
|
||||
|
||||
bool is_FpDest() const { return is_FpDest_; }
|
||||
bool is_FpSrc(int i) const { return (is_FpSrc_ >> i) & 0x1; }
|
||||
|
||||
bool is_VDest() const { return is_VDest_; }
|
||||
bool is_VSrc(int i) const { return (is_VSrc_ >> i) & 0x1; }
|
||||
|
||||
private:
|
||||
|
||||
enum {
|
||||
|
@ -110,15 +123,18 @@ private:
|
|||
|
||||
Opcode opcode_;
|
||||
int nRsrc_;
|
||||
int nPsrc_;
|
||||
bool hasImmSrc_;
|
||||
bool hasRDest_;
|
||||
bool hasRDest_;
|
||||
bool is_FpDest_;
|
||||
bool is_VDest_;
|
||||
int is_FpSrc_;
|
||||
int is_VSrc_;
|
||||
Word immsrc_;
|
||||
Word func2_;
|
||||
Word func3_;
|
||||
Word func7_;
|
||||
RegNum rsrc_[MAX_REG_SOURCES];
|
||||
RegNum rdest_;
|
||||
int rsrc_[MAX_REG_SOURCES];
|
||||
int rdest_;
|
||||
|
||||
//Vector
|
||||
bool vsetImm_;
|
||||
|
|
|
@ -15,8 +15,8 @@ using namespace vortex;
|
|||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string archString("rv32i");
|
||||
int num_cores(1);
|
||||
std::string archString("rv32imf");
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
std::string imgFileName;
|
||||
|
@ -48,7 +48,7 @@ int main(int argc, char **argv) {
|
|||
ArchDef arch(archString, num_cores, num_warps, num_threads);
|
||||
|
||||
Decoder decoder(arch);
|
||||
MemoryUnit mu(4096, arch.getWordSize(), true);
|
||||
MemoryUnit mu(4096, arch.wsize(), true);
|
||||
|
||||
RAM old_ram;
|
||||
old_ram.loadHexImpl(imgFileName.c_str());
|
||||
|
@ -59,7 +59,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
std::vector<std::shared_ptr<Core>> cores(num_cores);
|
||||
for (int i = 0; i < num_cores; ++i) {
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu);
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu, i);
|
||||
}
|
||||
|
||||
bool running;
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "core.h"
|
||||
#include "args.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string archString("rv32i");
|
||||
int num_cores(1);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
std::string imgFileName;
|
||||
bool showHelp(false);
|
||||
bool showStats(false);
|
||||
|
||||
/* Read the command line arguments. */
|
||||
CommandLineArgFlag fh("-h", "--help", "", showHelp);
|
||||
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archString);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
|
||||
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
|
||||
CommandLineArgFlag fs("-s", "--stats", "", showStats);
|
||||
|
||||
CommandLineArg::readArgs(argc - 1, argv + 1);
|
||||
|
||||
if (showHelp || imgFileName.empty()) {
|
||||
std::cout << "Vortex emulator command line arguments:\n"
|
||||
" -i, --image <filename> Program RAM image\n"
|
||||
" -c, --cores <num> Number of cores\n"
|
||||
" -w, --warps <num> Number of warps\n"
|
||||
" -t, --threads <num> Number of threads\n"
|
||||
" -a, --arch <arch string> Architecture string\n"
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
ArchDef arch(archString, num_cores, num_warps, num_threads);
|
||||
|
||||
Decoder decoder(arch);
|
||||
MemoryUnit mu(4096, arch.getWordSize(), true);
|
||||
|
||||
RAM old_ram;
|
||||
old_ram.loadHexImpl(imgFileName.c_str());
|
||||
mu.attach(old_ram, 0);
|
||||
|
||||
struct stat hello;
|
||||
fstat(0, &hello);
|
||||
|
||||
std::vector<std::shared_ptr<Core>> cores(num_cores);
|
||||
for (int i = 0; i < num_cores; ++i) {
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu);
|
||||
}
|
||||
|
||||
bool running;
|
||||
|
||||
do {
|
||||
running = false;
|
||||
for (int i = 0; i < num_cores; ++i) {
|
||||
if (!cores[i]->running())
|
||||
continue;
|
||||
running = true;
|
||||
cores[i]->step();
|
||||
}
|
||||
} while (running);
|
||||
|
||||
return 0;
|
||||
}
|
25
simX/trace.h
25
simX/trace.h
|
@ -5,22 +5,27 @@ namespace vortex {
|
|||
|
||||
struct trace_inst_t {
|
||||
// Warp step
|
||||
bool valid_inst;
|
||||
unsigned pc;
|
||||
bool valid;
|
||||
unsigned PC;
|
||||
|
||||
// Core scheduler
|
||||
int wid;
|
||||
|
||||
// Encoder
|
||||
int rs1;
|
||||
int rs2;
|
||||
int rs3;
|
||||
int rd;
|
||||
int irs1;
|
||||
int irs2;
|
||||
int ird;
|
||||
|
||||
//Encoder
|
||||
int vs1;
|
||||
int vs2;
|
||||
int vd;
|
||||
// Floating-point
|
||||
int frs1;
|
||||
int frs2;
|
||||
int frs3;
|
||||
int frd;
|
||||
|
||||
// Vector extension
|
||||
int vrs1;
|
||||
int vrs2;
|
||||
int vrd;
|
||||
|
||||
// Instruction execute
|
||||
bool is_lw;
|
||||
|
|
10
simX/types.h
10
simX/types.h
|
@ -1,20 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <bitset>
|
||||
#include <VX_config.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
typedef uint8_t Byte;
|
||||
typedef uint32_t Word;
|
||||
typedef uint32_t Word_u;
|
||||
typedef int32_t Word_s;
|
||||
|
||||
typedef Word_u Addr;
|
||||
typedef Word_u Size;
|
||||
typedef uint32_t Addr;
|
||||
typedef uint32_t Size;
|
||||
|
||||
typedef unsigned RegNum;
|
||||
typedef unsigned ThdNum;
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
|
||||
enum MemFlags {
|
||||
RD_USR = 1,
|
||||
|
|
|
@ -12,15 +12,15 @@ Word vortex::signExt(Word w, Size bit, Word mask) {
|
|||
return w;
|
||||
}
|
||||
|
||||
void vortex::wordToBytes(Byte *b, Word_u w, Size wordSize) {
|
||||
void vortex::wordToBytes(Byte *b, Word w, Size wordSize) {
|
||||
while (wordSize--) {
|
||||
*(b++) = w & 0xff;
|
||||
w >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
|
||||
Word_u w = 0;
|
||||
Word vortex::bytesToWord(const Byte *b, Size wordSize) {
|
||||
Word w = 0;
|
||||
b += wordSize-1;
|
||||
while (wordSize--) {
|
||||
w <<= 8;
|
||||
|
@ -29,15 +29,15 @@ Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
|
|||
return w;
|
||||
}
|
||||
|
||||
Word_u vortex::flagsToWord(bool r, bool w, bool x) {
|
||||
Word_u word = 0;
|
||||
Word vortex::flagsToWord(bool r, bool w, bool x) {
|
||||
Word word = 0;
|
||||
if (r) word |= RD_USR;
|
||||
if (w) word |= WR_USR;
|
||||
if (x) word |= EX_USR;
|
||||
return word;
|
||||
}
|
||||
|
||||
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word_u f) {
|
||||
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word f) {
|
||||
r = f & RD_USR;
|
||||
w = f & WR_USR;
|
||||
x = f & EX_USR;
|
||||
|
@ -49,10 +49,10 @@ Byte vortex::readByte(const std::vector<Byte> &b, Size &n) {
|
|||
return b[n++];
|
||||
}
|
||||
|
||||
Word_u vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
|
||||
Word vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
|
||||
if (b.size() - n < wordSize)
|
||||
throw std::out_of_range("out of range");
|
||||
Word_u w(0);
|
||||
Word w(0);
|
||||
n += wordSize;
|
||||
// std::cout << "wordSize: " << wordSize << "\n";
|
||||
for (Size i = 0; i < wordSize; i++) {
|
||||
|
|
10
simX/util.h
10
simX/util.h
|
@ -12,13 +12,13 @@ void unused(Args&&...) {}
|
|||
|
||||
Word signExt(Word w, Size bit, Word mask);
|
||||
|
||||
Word_u bytesToWord(const Byte *b, Size wordSize);
|
||||
void wordToBytes(Byte *b, Word_u w, Size wordSize);
|
||||
Word_u flagsToWord(bool r, bool w, bool x);
|
||||
void wordToFlags(bool &r, bool &w, bool &x, Word_u f);
|
||||
Word bytesToWord(const Byte *b, Size wordSize);
|
||||
void wordToBytes(Byte *b, Word w, Size wordSize);
|
||||
Word flagsToWord(bool r, bool w, bool x);
|
||||
void wordToFlags(bool &r, bool &w, bool &x, Word f);
|
||||
|
||||
Byte readByte(const std::vector<Byte> &b, Size &n);
|
||||
Word_u readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
|
||||
Word readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
|
||||
void writeByte(std::vector<Byte> &p, Size &n, Byte b);
|
||||
void writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w);
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "instr.h"
|
||||
|
@ -11,87 +12,67 @@ using namespace vortex;
|
|||
|
||||
Warp::Warp(Core *core, Word id)
|
||||
: id_(id)
|
||||
, active_(false)
|
||||
, core_(core)
|
||||
, pc_(0x80000000)
|
||||
, shadowPc_(0)
|
||||
, activeThreads_(0)
|
||||
, shadowActiveThreads_(0)
|
||||
, shadowIReg_(core_->arch().getNumRegs())
|
||||
, VLEN_(1024)
|
||||
, spawned_(false)
|
||||
, PC_(0x80000000)
|
||||
, steps_(0)
|
||||
, insts_(0)
|
||||
, loads_(0)
|
||||
, stores_(0) {
|
||||
D(3, "Creating a new thread with PC: " << std::hex << pc_);
|
||||
/* Build the register file. */
|
||||
Word regNum(0);
|
||||
for (Word j = 0; j < core_->arch().getNumThreads(); ++j) {
|
||||
iRegFile_.push_back(std::vector<Reg<Word>>(0));
|
||||
for (Word i = 0; i < core_->arch().getNumRegs(); ++i) {
|
||||
iRegFile_[j].push_back(Reg<Word>(id, regNum++));
|
||||
}
|
||||
|
||||
bool act = false;
|
||||
if (j == 0)
|
||||
act = true;
|
||||
tmask_.push_back(act);
|
||||
shadowTmask_.push_back(act);
|
||||
}
|
||||
tmask_.reset();
|
||||
|
||||
for (Word i = 0; i < (1 << 12); i++) {
|
||||
csrs_.push_back(Reg<uint32_t>(id, regNum++));
|
||||
}
|
||||
|
||||
/* Set initial register contents. */
|
||||
iRegFile_[0][0] = (core_->arch().getNumThreads() << (core_->arch().getWordSize() * 8 / 2)) | id;
|
||||
iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
|
||||
csrs_.resize(core_->arch().num_csrs());
|
||||
}
|
||||
|
||||
void Warp::step(trace_inst_t *trace_inst) {
|
||||
assert(tmask_.any());
|
||||
|
||||
Size fetchPos(0);
|
||||
Size decPos;
|
||||
Size wordSize(core_->arch().getWordSize());
|
||||
Size wordSize(core_->arch().wsize());
|
||||
std::vector<Byte> fetchBuffer(wordSize);
|
||||
|
||||
if (activeThreads_ == 0)
|
||||
return;
|
||||
|
||||
++steps_;
|
||||
|
||||
D(3, "current PC=0x" << std::hex << pc_);
|
||||
D(3, "current PC=0x" << std::hex << PC_);
|
||||
|
||||
// std::cout << "pc: " << std::hex << pc << "\n";
|
||||
trace_inst->pc = pc_;
|
||||
// std::cout << "PC: " << std::hex << PC << "\n";
|
||||
trace_inst->PC = PC_;
|
||||
|
||||
/* Fetch and decode. */
|
||||
if (wordSize < sizeof(pc_))
|
||||
pc_ &= ((1ll << (wordSize * 8)) - 1);
|
||||
if (wordSize < sizeof(PC_))
|
||||
PC_ &= ((1ll << (wordSize * 8)) - 1);
|
||||
|
||||
unsigned fetchSize = 4;
|
||||
fetchBuffer.resize(fetchSize);
|
||||
Word fetched = core_->mem().fetch(pc_ + fetchPos, 0);
|
||||
Word fetched = core_->mem().fetch(PC_ + fetchPos, 0);
|
||||
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
|
||||
|
||||
decPos = 0;
|
||||
std::shared_ptr<Instr> instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst);
|
||||
|
||||
// Update pc
|
||||
pc_ += decPos;
|
||||
// Update PC
|
||||
PC_ += decPos;
|
||||
|
||||
// Execute
|
||||
this->execute(*instr, trace_inst);
|
||||
|
||||
// At Debug Level 3, print debug info after each instruction.
|
||||
D(3, "Register state:");
|
||||
for (unsigned i = 0; i < iRegFile_[0].size(); ++i) {
|
||||
D_RAW(" %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (unsigned j = 0; j < (activeThreads_); ++j)
|
||||
D_RAW(' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
|
||||
D_RAW('(' << shadowIReg_[i] << ')' << std::endl);
|
||||
D(4, "Register state:");
|
||||
for (int i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (int j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
}
|
||||
|
||||
DPH(3, "Thread mask:");
|
||||
for (unsigned i = 0; i < tmask_.size(); ++i)
|
||||
for (int i = 0; i < core_->arch().num_threads(); ++i)
|
||||
DPN(3, " " << tmask_[i]);
|
||||
DPN(3, "\n");
|
||||
}
|
||||
|
|
148
simX/warp.h
148
simX/warp.h
|
@ -7,69 +7,25 @@
|
|||
|
||||
namespace vortex {
|
||||
|
||||
template <typename T>
|
||||
class Reg {
|
||||
public:
|
||||
Reg()
|
||||
: value_(0), cpuId_(0), regNum_(0) {}
|
||||
Reg(Word c, Word n)
|
||||
: value_(0), cpuId_(c), regNum_(n) {}
|
||||
Reg(Word c, Word n, T v)
|
||||
: value_(v), cpuId_(c), regNum_(n) {}
|
||||
|
||||
const T &value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
Reg &operator=(T r) {
|
||||
if (regNum_) {
|
||||
value_ = r;
|
||||
doWrite();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator T() const {
|
||||
doRead();
|
||||
return value_;
|
||||
}
|
||||
|
||||
void trunc(Size s) {
|
||||
Word mask((~0ull >> (sizeof(Word) - s) * 8));
|
||||
value_ &= mask;
|
||||
}
|
||||
|
||||
private:
|
||||
T value_;
|
||||
Word cpuId_, regNum_;
|
||||
|
||||
void doWrite() const {}
|
||||
void doRead() const {}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(
|
||||
unsigned p,
|
||||
const std::vector<std::vector<Reg<Word>>> &m,
|
||||
std::vector<bool> &tm,
|
||||
Word pc
|
||||
) : pc(pc)
|
||||
, fallThrough(false)
|
||||
, uni(false) {
|
||||
for (unsigned i = 0; i < m.size(); ++i) {
|
||||
tmask.push_back(!bool(m[i][p]) && tm[i]);
|
||||
}
|
||||
}
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
: tmask(tmask)
|
||||
, PC(PC)
|
||||
, fallThrough(false)
|
||||
, unanimous(false)
|
||||
{}
|
||||
|
||||
DomStackEntry(const std::vector<bool> &tmask)
|
||||
: tmask(tmask), fallThrough(true), uni(false) {}
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, PC(0)
|
||||
, fallThrough(true)
|
||||
, unanimous(false)
|
||||
{}
|
||||
|
||||
std::vector<bool> tmask;
|
||||
Word pc;
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
bool fallThrough;
|
||||
bool uni;
|
||||
bool unanimous;
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
|
@ -86,11 +42,13 @@ class trace_inst_t;
|
|||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, Word id = 0);
|
||||
|
||||
void step(trace_inst_t *);
|
||||
|
||||
bool running() const {
|
||||
return (activeThreads_ != 0);
|
||||
bool active() const {
|
||||
return tmask_.any();
|
||||
}
|
||||
|
||||
std::size_t getActiveThreads() const {
|
||||
return tmask_.count();
|
||||
}
|
||||
|
||||
void printStats() const;
|
||||
|
@ -103,68 +61,40 @@ public:
|
|||
return id_;
|
||||
}
|
||||
|
||||
Word get_pc() const {
|
||||
return pc_;
|
||||
Word getPC() const {
|
||||
return PC_;
|
||||
}
|
||||
|
||||
void set_pc(Word pc) {
|
||||
pc_ = pc;
|
||||
}
|
||||
|
||||
void setActiveThreads(Size activeThreads) {
|
||||
activeThreads_ = activeThreads;
|
||||
}
|
||||
|
||||
Size getActiveThreads() const {
|
||||
return activeThreads_;
|
||||
}
|
||||
|
||||
void setSpawned(bool spawned) {
|
||||
spawned_ = spawned;
|
||||
void setPC(Word PC) {
|
||||
PC_ = PC;
|
||||
}
|
||||
|
||||
void setTmask(size_t index, bool value) {
|
||||
tmask_[index] = value;
|
||||
}
|
||||
|
||||
void step(trace_inst_t *);
|
||||
|
||||
private:
|
||||
|
||||
void execute(Instr &instr, trace_inst_t *);
|
||||
|
||||
struct MemAccess {
|
||||
MemAccess(bool w, Word a)
|
||||
: wr(w), addr(a) {}
|
||||
bool wr;
|
||||
Word addr;
|
||||
};
|
||||
|
||||
std::vector<MemAccess> memAccesses_;
|
||||
|
||||
Word id_;
|
||||
bool active_;
|
||||
Core *core_;
|
||||
Word pc_;
|
||||
Word shadowPc_;
|
||||
Size activeThreads_;
|
||||
Size shadowActiveThreads_;
|
||||
std::vector<std::vector<Reg<Word>>> iRegFile_;
|
||||
std::vector<std::vector<Reg<Word>>> fRegFile_;
|
||||
std::vector<Reg<uint32_t>> csrs_;
|
||||
|
||||
std::vector<bool> tmask_;
|
||||
std::vector<bool> shadowTmask_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> iRegFile_;
|
||||
std::vector<std::vector<Word>> fRegFile_;
|
||||
std::vector<std::vector<Byte>> vRegFile_;
|
||||
std::vector<Word> csrs_;
|
||||
std::stack<DomStackEntry> domStack_;
|
||||
|
||||
std::vector<Word> shadowIReg_;
|
||||
std::vector<Word> shadowFReg_;
|
||||
|
||||
struct vtype vtype_; // both of them are XLEN WIDE
|
||||
int vl_; // both of them are XLEN WIDE
|
||||
Word VLEN_; // total vector length
|
||||
|
||||
std::vector<std::vector<Reg<char *>>> vregFile_; // 32 vector registers
|
||||
|
||||
bool spawned_;
|
||||
|
||||
struct vtype vtype_;
|
||||
int vl_;
|
||||
|
||||
unsigned long steps_;
|
||||
unsigned long insts_;
|
||||
unsigned long loads_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue