tabs cleanup

This commit is contained in:
Blaise Tine 2024-03-13 23:19:54 -07:00
parent a8f2bb30da
commit 07c063031f
11 changed files with 1112 additions and 1112 deletions

View file

@ -19,88 +19,88 @@ namespace vortex {
class CacheCluster : public SimObject<CacheCluster> {
public:
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_units,
uint32_t num_caches,
uint32_t num_requests,
const CacheSim::Config& config)
: SimObject(ctx, name)
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
, MemReqPort(this)
, MemRspPort(this)
, caches_(MAX(num_caches, 0x1)) {
CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_units,
uint32_t num_caches,
uint32_t num_requests,
const CacheSim::Config& config)
: SimObject(ctx, name)
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
, MemReqPort(this)
, MemRspPort(this)
, caches_(MAX(num_caches, 0x1)) {
CacheSim::Config config2(config);
if (0 == num_caches) {
num_caches = 1;
config2.bypass = true;
}
CacheSim::Config config2(config);
if (0 == num_caches) {
num_caches = 1;
config2.bypass = true;
}
char sname[100];
std::vector<MemSwitch::Ptr> unit_arbs(num_units);
for (uint32_t u = 0; u < num_units; ++u) {
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
for (uint32_t i = 0; i < num_requests; ++i) {
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
}
}
char sname[100];
std::vector<MemSwitch::Ptr> unit_arbs(num_units);
for (uint32_t u = 0; u < num_units; ++u) {
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
for (uint32_t i = 0; i < num_requests; ++i) {
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
}
}
std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
for (uint32_t i = 0; i < config.num_inputs; ++i) {
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
for (uint32_t u = 0; u < num_units; ++u) {
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
}
}
std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
for (uint32_t i = 0; i < config.num_inputs; ++i) {
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
for (uint32_t u = 0; u < num_units; ++u) {
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
}
}
snprintf(sname, 100, "%s-cache-arb", name);
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
snprintf(sname, 100, "%s-cache-arb", name);
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
for (uint32_t i = 0; i < num_caches; ++i) {
snprintf(sname, 100, "%s-cache%d", name, i);
caches_.at(i) = CacheSim::Create(sname, config2);
for (uint32_t i = 0; i < num_caches; ++i) {
snprintf(sname, 100, "%s-cache%d", name, i);
caches_.at(i) = CacheSim::Create(sname, config2);
for (uint32_t j = 0; j < config.num_inputs; ++j) {
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
}
for (uint32_t j = 0; j < config.num_inputs; ++j) {
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
}
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
}
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
}
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
}
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
}
~CacheCluster() {}
~CacheCluster() {}
void reset() {}
void tick() {}
void reset() {}
void tick() {}
CacheSim::PerfStats perf_stats() const {
CacheSim::PerfStats perf;
for (auto cache : caches_) {
perf += cache->perf_stats();
}
return perf;
}
CacheSim::PerfStats perf_stats() const {
CacheSim::PerfStats perf;
for (auto cache : caches_) {
perf += cache->perf_stats();
}
return perf;
}
private:
std::vector<CacheSim::Ptr> caches_;
std::vector<CacheSim::Ptr> caches_;
};
}

File diff suppressed because it is too large Load diff

View file

@ -20,76 +20,76 @@ namespace vortex {
class CacheSim : public SimObject<CacheSim> {
public:
struct Config {
bool bypass; // cache bypass
uint8_t C; // log2 cache size
uint8_t L; // log2 line size
uint8_t W; // log2 word size
uint8_t A; // log2 associativity
uint8_t B; // log2 number of banks
uint8_t addr_width; // word address bits
uint8_t ports_per_bank; // number of ports per bank
uint8_t num_inputs; // number of inputs
bool write_through; // is write-through
bool write_reponse; // enable write response
uint16_t mshr_size; // MSHR buffer size
uint8_t latency; // pipeline latency
};
struct PerfStats {
uint64_t reads;
uint64_t writes;
uint64_t read_misses;
uint64_t write_misses;
uint64_t evictions;
uint64_t pipeline_stalls;
uint64_t bank_stalls;
uint64_t mshr_stalls;
uint64_t mem_latency;
struct Config {
bool bypass; // cache bypass
uint8_t C; // log2 cache size
uint8_t L; // log2 line size
uint8_t W; // log2 word size
uint8_t A; // log2 associativity
uint8_t B; // log2 number of banks
uint8_t addr_width; // word address bits
uint8_t ports_per_bank; // number of ports per bank
uint8_t num_inputs; // number of inputs
bool write_through; // is write-through
bool write_reponse; // enable write response
uint16_t mshr_size; // MSHR buffer size
uint8_t latency; // pipeline latency
};
struct PerfStats {
uint64_t reads;
uint64_t writes;
uint64_t read_misses;
uint64_t write_misses;
uint64_t evictions;
uint64_t pipeline_stalls;
uint64_t bank_stalls;
uint64_t mshr_stalls;
uint64_t mem_latency;
PerfStats()
: reads(0)
, writes(0)
, read_misses(0)
, write_misses(0)
, evictions(0)
, pipeline_stalls(0)
, bank_stalls(0)
, mshr_stalls(0)
, mem_latency(0)
{}
PerfStats()
: reads(0)
, writes(0)
, read_misses(0)
, write_misses(0)
, evictions(0)
, pipeline_stalls(0)
, bank_stalls(0)
, mshr_stalls(0)
, mem_latency(0)
{}
PerfStats& operator+=(const PerfStats& rhs) {
this->reads += rhs.reads;
this->writes += rhs.writes;
this->read_misses += rhs.read_misses;
this->write_misses += rhs.write_misses;
this->evictions += rhs.evictions;
this->pipeline_stalls += rhs.pipeline_stalls;
this->bank_stalls += rhs.bank_stalls;
this->mshr_stalls += rhs.mshr_stalls;
this->mem_latency += rhs.mem_latency;
return *this;
}
};
PerfStats& operator+=(const PerfStats& rhs) {
this->reads += rhs.reads;
this->writes += rhs.writes;
this->read_misses += rhs.read_misses;
this->write_misses += rhs.write_misses;
this->evictions += rhs.evictions;
this->pipeline_stalls += rhs.pipeline_stalls;
this->bank_stalls += rhs.bank_stalls;
this->mshr_stalls += rhs.mshr_stalls;
this->mem_latency += rhs.mem_latency;
return *this;
}
};
std::vector<SimPort<MemReq>> CoreReqPorts;
std::vector<SimPort<MemRsp>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<SimPort<MemReq>> CoreReqPorts;
std::vector<SimPort<MemRsp>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
CacheSim(const SimContext& ctx, const char* name, const Config& config);
~CacheSim();
CacheSim(const SimContext& ctx, const char* name, const Config& config);
~CacheSim();
void reset();
void tick();
void reset();
void tick();
const PerfStats& perf_stats() const;
const PerfStats& perf_stats() const;
private:
class Impl;
Impl* impl_;
class Impl;
Impl* impl_;
};
}

View file

@ -21,25 +21,25 @@ namespace vortex {
class BaseDCRS {
public:
uint32_t read(uint32_t addr) const {
uint32_t state = VX_DCR_BASE_STATE(addr);
return states_.at(state);
}
uint32_t read(uint32_t addr) const {
uint32_t state = VX_DCR_BASE_STATE(addr);
return states_.at(state);
}
void write(uint32_t addr, uint32_t value) {
uint32_t state = VX_DCR_BASE_STATE(addr);
states_.at(state) = value;
}
void write(uint32_t addr, uint32_t value) {
uint32_t state = VX_DCR_BASE_STATE(addr);
states_.at(state) = value;
}
private:
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
private:
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
};
class DCRS {
public:
void write(uint32_t addr, uint32_t value);
BaseDCRS base_dcrs;
void write(uint32_t addr, uint32_t value);
BaseDCRS base_dcrs;
};
}

View file

@ -21,120 +21,120 @@ namespace vortex {
class Dispatcher : public SimObject<Dispatcher> {
public:
std::vector<SimPort<instr_trace_t*>> Outputs;
std::vector<SimPort<instr_trace_t*>> Outputs;
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
: SimObject<Dispatcher>(ctx, "Dispatcher")
, Outputs(ISSUE_WIDTH, this)
, Inputs_(ISSUE_WIDTH, this)
, arch_(arch)
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
, buf_size_(buf_size)
, block_size_(block_size)
, num_lanes_(num_lanes)
, batch_count_(ISSUE_WIDTH / block_size)
, pid_count_(arch.num_threads() / num_lanes)
, batch_idx_(0)
, start_p_(block_size, 0)
{}
virtual ~Dispatcher() {}
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
: SimObject<Dispatcher>(ctx, "Dispatcher")
, Outputs(ISSUE_WIDTH, this)
, Inputs_(ISSUE_WIDTH, this)
, arch_(arch)
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
, buf_size_(buf_size)
, block_size_(block_size)
, num_lanes_(num_lanes)
, batch_count_(ISSUE_WIDTH / block_size)
, pid_count_(arch.num_threads() / num_lanes)
, batch_idx_(0)
, start_p_(block_size, 0)
{}
virtual ~Dispatcher() {}
virtual void reset() {
batch_idx_ = 0;
for (uint32_t b = 0; b < block_size_; ++b) {
start_p_.at(b) = 0;
}
}
virtual void reset() {
batch_idx_ = 0;
for (uint32_t b = 0; b < block_size_; ++b) {
start_p_.at(b) = 0;
}
}
virtual void tick() {
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
auto& queue = queues_.at(i);
if (queue.empty())
continue;
auto trace = queue.front();
Inputs_.at(i).push(trace, 1);
queue.pop();
}
virtual void tick() {
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
auto& queue = queues_.at(i);
if (queue.empty())
continue;
auto trace = queue.front();
Inputs_.at(i).push(trace, 1);
queue.pop();
}
uint32_t block_sent = 0;
for (uint32_t b = 0; b < block_size_; ++b) {
uint32_t i = batch_idx_ * block_size_ + b;
auto& input = Inputs_.at(i);
if (input.empty()) {
++block_sent;
continue;
}
auto& output = Outputs.at(i);
auto trace = input.front();
auto new_trace = trace;
if (pid_count_ != 1) {
auto start_p = start_p_.at(b);
if (start_p == -1) {
++block_sent;
continue;
}
int start(-1), end(-1);
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
if (!trace->tmask.test(j))
continue;
if (start == -1)
start = j;
end = j;
}
start /= num_lanes_;
end /= num_lanes_;
if (start != end) {
new_trace = new instr_trace_t(*trace);
new_trace->eop = false;
start_p_.at(b) = start + 1;
} else {
start_p_.at(b) = -1;
input.pop();
++block_sent;
}
new_trace->pid = start;
new_trace->sop = (0 == start_p);
ThreadMask tmask;
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
tmask[j] = trace->tmask[j];
}
new_trace->tmask = tmask;
} else {
new_trace->pid = 0;
input.pop();
++block_sent;
}
DT(3, "pipeline-dispatch: " << *new_trace);
output.push(new_trace, 1);
}
if (block_sent == block_size_) {
batch_idx_ = (batch_idx_ + 1) % batch_count_;
for (uint32_t b = 0; b < block_size_; ++b) {
start_p_.at(b) = 0;
}
}
};
uint32_t block_sent = 0;
for (uint32_t b = 0; b < block_size_; ++b) {
uint32_t i = batch_idx_ * block_size_ + b;
auto& input = Inputs_.at(i);
if (input.empty()) {
++block_sent;
continue;
}
auto& output = Outputs.at(i);
auto trace = input.front();
auto new_trace = trace;
if (pid_count_ != 1) {
auto start_p = start_p_.at(b);
if (start_p == -1) {
++block_sent;
continue;
}
int start(-1), end(-1);
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
if (!trace->tmask.test(j))
continue;
if (start == -1)
start = j;
end = j;
}
start /= num_lanes_;
end /= num_lanes_;
if (start != end) {
new_trace = new instr_trace_t(*trace);
new_trace->eop = false;
start_p_.at(b) = start + 1;
} else {
start_p_.at(b) = -1;
input.pop();
++block_sent;
}
new_trace->pid = start;
new_trace->sop = (0 == start_p);
ThreadMask tmask;
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
tmask[j] = trace->tmask[j];
}
new_trace->tmask = tmask;
} else {
new_trace->pid = 0;
input.pop();
++block_sent;
}
DT(3, "pipeline-dispatch: " << *new_trace);
output.push(new_trace, 1);
}
if (block_sent == block_size_) {
batch_idx_ = (batch_idx_ + 1) % batch_count_;
for (uint32_t b = 0; b < block_size_; ++b) {
start_p_.at(b) = 0;
}
}
};
bool push(uint32_t issue_index, instr_trace_t* trace) {
auto& queue = queues_.at(issue_index);
if (queue.size() >= buf_size_)
return false;
queue.push(trace);
return true;
}
bool push(uint32_t issue_index, instr_trace_t* trace) {
auto& queue = queues_.at(issue_index);
if (queue.size() >= buf_size_)
return false;
queue.push(trace);
return true;
}
private:
std::vector<SimPort<instr_trace_t*>> Inputs_;
const Arch& arch_;
std::vector<std::queue<instr_trace_t*>> queues_;
uint32_t buf_size_;
uint32_t block_size_;
uint32_t num_lanes_;
uint32_t batch_count_;
uint32_t pid_count_;
uint32_t batch_idx_;
std::vector<int> start_p_;
std::vector<SimPort<instr_trace_t*>> Inputs_;
const Arch& arch_;
std::vector<std::queue<instr_trace_t*>> queues_;
uint32_t buf_size_;
uint32_t block_size_;
uint32_t num_lanes_;
uint32_t batch_count_;
uint32_t pid_count_;
uint32_t batch_idx_;
std::vector<int> start_p_;
};
}

View file

@ -19,39 +19,39 @@
namespace vortex {
class IBuffer {
public:
IBuffer(uint32_t size)
: capacity_(size)
{}
public:
IBuffer(uint32_t size)
: capacity_(size)
{}
bool empty() const {
return entries_.empty();
}
bool full() const {
return (entries_.size() == capacity_);
}
bool empty() const {
return entries_.empty();
}
bool full() const {
return (entries_.size() == capacity_);
}
instr_trace_t* top() const {
return entries_.front();
}
instr_trace_t* top() const {
return entries_.front();
}
void push(instr_trace_t* trace) {
entries_.emplace(trace);
}
void push(instr_trace_t* trace) {
entries_.emplace(trace);
}
void pop() {
return entries_.pop();
}
void pop() {
return entries_.pop();
}
void clear() {
std::queue<instr_trace_t*> empty;
std::swap(entries_, empty );
}
void clear() {
std::queue<instr_trace_t*> empty;
std::swap(entries_, empty );
}
private:
std::queue<instr_trace_t*> entries_;
uint32_t capacity_;
std::queue<instr_trace_t*> entries_;
uint32_t capacity_;
};
}

View file

@ -21,118 +21,118 @@ using namespace vortex;
class LocalMem::Impl {
protected:
LocalMem* simobject_;
Config config_;
RAM ram_;
uint32_t bank_sel_addr_start_;
uint32_t bank_sel_addr_end_;
PerfStats perf_stats_;
LocalMem* simobject_;
Config config_;
RAM ram_;
uint32_t bank_sel_addr_start_;
uint32_t bank_sel_addr_end_;
PerfStats perf_stats_;
uint64_t to_local_addr(uint64_t addr) {
uint32_t total_lines = config_.capacity / config_.line_size;
uint32_t line_bits = log2ceil(total_lines);
uint32_t offset = bit_getw(addr, 0, line_bits-1);
return offset;
}
uint64_t to_local_addr(uint64_t addr) {
uint32_t total_lines = config_.capacity / config_.line_size;
uint32_t line_bits = log2ceil(total_lines);
uint32_t offset = bit_getw(addr, 0, line_bits-1);
return offset;
}
public:
Impl(LocalMem* simobject, const Config& config)
: simobject_(simobject)
, config_(config)
, ram_(config.capacity)
, bank_sel_addr_start_(0)
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
{}
virtual ~Impl() {}
Impl(LocalMem* simobject, const Config& config)
: simobject_(simobject)
, config_(config)
, ram_(config.capacity)
, bank_sel_addr_start_(0)
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
{}
virtual ~Impl() {}
void reset() {
perf_stats_ = PerfStats();
}
void reset() {
perf_stats_ = PerfStats();
}
void read(void* data, uint64_t addr, uint32_t size) {
auto s_addr = to_local_addr(addr);
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
ram_.read(data, s_addr, size);
}
void read(void* data, uint64_t addr, uint32_t size) {
auto s_addr = to_local_addr(addr);
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
ram_.read(data, s_addr, size);
}
void write(const void* data, uint64_t addr, uint32_t size) {
auto s_addr = to_local_addr(addr);
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
ram_.write(data, s_addr, size);
}
void write(const void* data, uint64_t addr, uint32_t size) {
auto s_addr = to_local_addr(addr);
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
ram_.write(data, s_addr, size);
}
void tick() {
std::vector<bool> in_used_banks(config_.num_banks);
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
auto& core_req_port = simobject_->Inputs.at(req_id);
if (core_req_port.empty())
continue;
void tick() {
std::vector<bool> in_used_banks(config_.num_banks);
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
auto& core_req_port = simobject_->Inputs.at(req_id);
if (core_req_port.empty())
continue;
auto& core_req = core_req_port.front();
auto& core_req = core_req_port.front();
uint32_t bank_id = 0;
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
}
uint32_t bank_id = 0;
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
}
// bank conflict check
if (in_used_banks.at(bank_id)) {
++perf_stats_.bank_stalls;
continue;
}
// bank conflict check
if (in_used_banks.at(bank_id)) {
++perf_stats_.bank_stalls;
continue;
}
in_used_banks.at(bank_id) = true;
in_used_banks.at(bank_id) = true;
if (!core_req.write || config_.write_reponse) {
// send response
MemRsp core_rsp{core_req.tag, core_req.cid};
simobject_->Outputs.at(req_id).push(core_rsp, 1);
}
if (!core_req.write || config_.write_reponse) {
// send response
MemRsp core_rsp{core_req.tag, core_req.cid};
simobject_->Outputs.at(req_id).push(core_rsp, 1);
}
// update perf counters
perf_stats_.reads += !core_req.write;
perf_stats_.writes += core_req.write;
// update perf counters
perf_stats_.reads += !core_req.write;
perf_stats_.writes += core_req.write;
// remove input
core_req_port.pop();
}
}
// remove input
core_req_port.pop();
}
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
};
///////////////////////////////////////////////////////////////////////////////
LocalMem::LocalMem(const SimContext& ctx, const char* name, const Config& config)
: SimObject<LocalMem>(ctx, name)
, Inputs(config.num_reqs, this)
, Outputs(config.num_reqs, this)
, impl_(new Impl(this, config))
: SimObject<LocalMem>(ctx, name)
, Inputs(config.num_reqs, this)
, Outputs(config.num_reqs, this)
, impl_(new Impl(this, config))
{}
LocalMem::~LocalMem() {
delete impl_;
delete impl_;
}
void LocalMem::reset() {
impl_->reset();
impl_->reset();
}
void LocalMem::read(void* data, uint64_t addr, uint32_t size) {
impl_->read(data, addr, size);
impl_->read(data, addr, size);
}
void LocalMem::write(const void* data, uint64_t addr, uint32_t size) {
impl_->write(data, addr, size);
impl_->write(data, addr, size);
}
void LocalMem::tick() {
impl_->tick();
impl_->tick();
}
const LocalMem::PerfStats& LocalMem::perf_stats() const {
return impl_->perf_stats();
return impl_->perf_stats();
}

View file

@ -32,105 +32,105 @@ using namespace vortex;
class MemSim::Impl {
private:
MemSim* simobject_;
Config config_;
PerfStats perf_stats_;
ramulator::Gem5Wrapper* dram_;
MemSim* simobject_;
Config config_;
PerfStats perf_stats_;
ramulator::Gem5Wrapper* dram_;
public:
Impl(MemSim* simobject, const Config& config)
: simobject_(simobject)
, config_(config)
{
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(config.channels));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(config.num_cores);
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
}
Impl(MemSim* simobject, const Config& config)
: simobject_(simobject)
, config_(config)
{
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(config.channels));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(config.num_cores);
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
}
~Impl() {
dram_->finish();
Stats::statlist.printall();
delete dram_;
}
~Impl() {
dram_->finish();
Stats::statlist.printall();
delete dram_;
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
const PerfStats& perf_stats() const {
return perf_stats_;
}
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
if (req.type == ramulator::Request::Type::WRITE)
return;
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
simobject_->MemRspPort.push(mem_rsp, 1);
DT(3, simobject_->name() << "-" << mem_rsp);
}
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
if (req.type == ramulator::Request::Type::WRITE)
return;
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
simobject_->MemRspPort.push(mem_rsp, 1);
DT(3, simobject_->name() << "-" << mem_rsp);
}
void reset() {
perf_stats_ = PerfStats();
}
void reset() {
perf_stats_ = PerfStats();
}
void tick() {
if (MEM_CYCLE_RATIO > 0) {
auto cycle = SimPlatform::instance().cycles();
if ((cycle % MEM_CYCLE_RATIO) == 0)
dram_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
dram_->tick();
}
if (simobject_->MemReqPort.empty())
return;
auto& mem_req = simobject_->MemReqPort.front();
void tick() {
if (MEM_CYCLE_RATIO > 0) {
auto cycle = SimPlatform::instance().cycles();
if ((cycle % MEM_CYCLE_RATIO) == 0)
dram_->tick();
} else {
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
dram_->tick();
}
if (simobject_->MemReqPort.empty())
return;
auto& mem_req = simobject_->MemReqPort.front();
ramulator::Request dram_req(
mem_req.addr,
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
mem_req.cid
);
ramulator::Request dram_req(
mem_req.addr,
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
mem_req.cid
);
if (!dram_->send(dram_req))
return;
if (mem_req.write) {
++perf_stats_.writes;
} else {
++perf_stats_.reads;
}
DT(3, simobject_->name() << "-" << mem_req);
if (!dram_->send(dram_req))
return;
if (mem_req.write) {
++perf_stats_.writes;
} else {
++perf_stats_.reads;
}
DT(3, simobject_->name() << "-" << mem_req);
simobject_->MemReqPort.pop();
}
simobject_->MemReqPort.pop();
}
};
///////////////////////////////////////////////////////////////////////////////
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPort(this)
, MemRspPort(this)
, impl_(new Impl(this, config))
: SimObject<MemSim>(ctx, name)
, MemReqPort(this)
, MemRspPort(this)
, impl_(new Impl(this, config))
{}
MemSim::~MemSim() {
delete impl_;
delete impl_;
}
void MemSim::reset() {
impl_->reset();
impl_->reset();
}
void MemSim::tick() {
impl_->tick();
impl_->tick();
}

View file

@ -20,36 +20,36 @@ namespace vortex {
class MemSim : public SimObject<MemSim>{
public:
struct Config {
uint32_t channels;
uint32_t num_cores;
};
struct Config {
uint32_t channels;
uint32_t num_cores;
};
struct PerfStats {
uint64_t reads;
uint64_t writes;
struct PerfStats {
uint64_t reads;
uint64_t writes;
PerfStats()
: reads(0)
, writes(0)
{}
};
PerfStats()
: reads(0)
, writes(0)
{}
};
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
MemSim(const SimContext& ctx, const char* name, const Config& config);
~MemSim();
MemSim(const SimContext& ctx, const char* name, const Config& config);
~MemSim();
void reset();
void reset();
void tick();
void tick();
const PerfStats& perf_stats() const;
const PerfStats& perf_stats() const;
private:
class Impl;
Impl* impl_;
class Impl;
Impl* impl_;
};
};

View file

@ -23,37 +23,37 @@ public:
SimPort<instr_trace_t*> Output;
Operand(const SimContext& ctx)
: SimObject<Operand>(ctx, "Operand")
, Input(this)
, Output(this)
: SimObject<Operand>(ctx, "Operand")
, Input(this)
, Output(this)
{}
virtual ~Operand() {}
virtual void reset() {}
virtual void tick() {
if (Input.empty())
return;
auto trace = Input.front();
if (Input.empty())
return;
auto trace = Input.front();
int delay = 1;
for (int i = 0; i < MAX_NUM_REGS; ++i) {
bool is_iregs = trace->used_iregs.test(i);
bool is_fregs = trace->used_fregs.test(i);
bool is_vregs = trace->used_vregs.test(i);
if (is_iregs || is_fregs || is_vregs) {
if (is_iregs && i == 0)
continue;
++delay;
}
}
int delay = 1;
for (int i = 0; i < MAX_NUM_REGS; ++i) {
bool is_iregs = trace->used_iregs.test(i);
bool is_fregs = trace->used_fregs.test(i);
bool is_vregs = trace->used_vregs.test(i);
if (is_iregs || is_fregs || is_vregs) {
if (is_iregs && i == 0)
continue;
++delay;
}
}
Output.push(trace, delay);
DT(3, "pipeline-operands: " << *trace);
Output.push(trace, delay);
DT(3, "pipeline-operands: " << *trace);
Input.pop();
Input.pop();
};
};

View file

@ -22,98 +22,98 @@ namespace vortex {
class Scoreboard {
public:
struct reg_use_t {
RegType reg_type;
uint32_t reg_id;
FUType fu_type;
SfuType sfu_type;
uint64_t uuid;
};
Scoreboard(const Arch &arch)
: in_use_iregs_(arch.num_warps())
, in_use_fregs_(arch.num_warps())
{
this->clear();
}
struct reg_use_t {
RegType reg_type;
uint32_t reg_id;
FUType fu_type;
SfuType sfu_type;
uint64_t uuid;
};
Scoreboard(const Arch &arch)
: in_use_iregs_(arch.num_warps())
, in_use_fregs_(arch.num_warps())
{
this->clear();
}
void clear() {
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
in_use_iregs_.at(i).reset();
in_use_fregs_.at(i).reset();
}
owners_.clear();
}
void clear() {
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
in_use_iregs_.at(i).reset();
in_use_fregs_.at(i).reset();
}
owners_.clear();
}
bool in_use(instr_trace_t* trace) const {
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
}
bool in_use(instr_trace_t* trace) const {
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
}
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
std::vector<reg_use_t> out;
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
std::vector<reg_use_t> out;
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
if (used_iregs.test(r)) {
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
auto owner = owners_.at(tag);
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
}
}
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
if (used_iregs.test(r)) {
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
auto owner = owners_.at(tag);
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
}
}
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
if (used_fregs.test(r)) {
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
auto owner = owners_.at(tag);
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
}
}
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
if (used_fregs.test(r)) {
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
auto owner = owners_.at(tag);
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
}
}
return out;
}
void reserve(instr_trace_t* trace) {
assert(trace->wb);
switch (trace->rdest_type) {
case RegType::Integer:
in_use_iregs_.at(trace->wid).set(trace->rdest);
break;
case RegType::Float:
in_use_fregs_.at(trace->wid).set(trace->rdest);
break;
default:
assert(false);
}
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
assert(owners_.count(tag) == 0);
owners_[tag] = trace;
assert((int)trace->fu_type < 5);
}
return out;
}
void reserve(instr_trace_t* trace) {
assert(trace->wb);
switch (trace->rdest_type) {
case RegType::Integer:
in_use_iregs_.at(trace->wid).set(trace->rdest);
break;
case RegType::Float:
in_use_fregs_.at(trace->wid).set(trace->rdest);
break;
default:
assert(false);
}
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
assert(owners_.count(tag) == 0);
owners_[tag] = trace;
assert((int)trace->fu_type < 5);
}
void release(instr_trace_t* trace) {
assert(trace->wb);
switch (trace->rdest_type) {
case RegType::Integer:
in_use_iregs_.at(trace->wid).reset(trace->rdest);
break;
case RegType::Float:
in_use_fregs_.at(trace->wid).reset(trace->rdest);
break;
default:
assert(false);
}
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
owners_.erase(tag);
}
void release(instr_trace_t* trace) {
assert(trace->wb);
switch (trace->rdest_type) {
case RegType::Integer:
in_use_iregs_.at(trace->wid).reset(trace->rdest);
break;
case RegType::Float:
in_use_fregs_.at(trace->wid).reset(trace->rdest);
break;
default:
assert(false);
}
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
owners_.erase(tag);
}
private:
std::vector<RegMask> in_use_iregs_;
std::vector<RegMask> in_use_fregs_;
std::unordered_map<uint32_t, instr_trace_t*> owners_;
std::vector<RegMask> in_use_iregs_;
std::vector<RegMask> in_use_fregs_;
std::unordered_map<uint32_t, instr_trace_t*> owners_;
};
}