mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
tabs cleanup
This commit is contained in:
parent
a8f2bb30da
commit
07c063031f
11 changed files with 1112 additions and 1112 deletions
|
@ -19,88 +19,88 @@ namespace vortex {
|
|||
|
||||
class CacheCluster : public SimObject<CacheCluster> {
|
||||
public:
|
||||
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
||||
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
||||
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_units,
|
||||
uint32_t num_caches,
|
||||
uint32_t num_requests,
|
||||
const CacheSim::Config& config)
|
||||
: SimObject(ctx, name)
|
||||
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, caches_(MAX(num_caches, 0x1)) {
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_units,
|
||||
uint32_t num_caches,
|
||||
uint32_t num_requests,
|
||||
const CacheSim::Config& config)
|
||||
: SimObject(ctx, name)
|
||||
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, caches_(MAX(num_caches, 0x1)) {
|
||||
|
||||
CacheSim::Config config2(config);
|
||||
if (0 == num_caches) {
|
||||
num_caches = 1;
|
||||
config2.bypass = true;
|
||||
}
|
||||
CacheSim::Config config2(config);
|
||||
if (0 == num_caches) {
|
||||
num_caches = 1;
|
||||
config2.bypass = true;
|
||||
}
|
||||
|
||||
char sname[100];
|
||||
|
||||
std::vector<MemSwitch::Ptr> unit_arbs(num_units);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
|
||||
unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
|
||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
|
||||
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
|
||||
}
|
||||
}
|
||||
char sname[100];
|
||||
|
||||
std::vector<MemSwitch::Ptr> unit_arbs(num_units);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
|
||||
unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
|
||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
|
||||
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
|
||||
for (uint32_t i = 0; i < config.num_inputs; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
|
||||
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
|
||||
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
|
||||
}
|
||||
}
|
||||
std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
|
||||
for (uint32_t i = 0; i < config.num_inputs; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
|
||||
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
|
||||
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(sname, 100, "%s-cache-arb", name);
|
||||
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
snprintf(sname, 100, "%s-cache-arb", name);
|
||||
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
|
||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||
caches_.at(i) = CacheSim::Create(sname, config2);
|
||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||
caches_.at(i) = CacheSim::Create(sname, config2);
|
||||
|
||||
for (uint32_t j = 0; j < config.num_inputs; ++j) {
|
||||
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
for (uint32_t j = 0; j < config.num_inputs; ++j) {
|
||||
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
|
||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||
}
|
||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||
}
|
||||
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
||||
}
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
||||
}
|
||||
|
||||
~CacheCluster() {}
|
||||
~CacheCluster() {}
|
||||
|
||||
void reset() {}
|
||||
|
||||
void tick() {}
|
||||
void reset() {}
|
||||
|
||||
void tick() {}
|
||||
|
||||
CacheSim::PerfStats perf_stats() const {
|
||||
CacheSim::PerfStats perf;
|
||||
for (auto cache : caches_) {
|
||||
perf += cache->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
CacheSim::PerfStats perf_stats() const {
|
||||
CacheSim::PerfStats perf;
|
||||
for (auto cache : caches_) {
|
||||
perf += cache->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<CacheSim::Ptr> caches_;
|
||||
std::vector<CacheSim::Ptr> caches_;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -20,76 +20,76 @@ namespace vortex {
|
|||
|
||||
class CacheSim : public SimObject<CacheSim> {
|
||||
public:
|
||||
struct Config {
|
||||
bool bypass; // cache bypass
|
||||
uint8_t C; // log2 cache size
|
||||
uint8_t L; // log2 line size
|
||||
uint8_t W; // log2 word size
|
||||
uint8_t A; // log2 associativity
|
||||
uint8_t B; // log2 number of banks
|
||||
uint8_t addr_width; // word address bits
|
||||
uint8_t ports_per_bank; // number of ports per bank
|
||||
uint8_t num_inputs; // number of inputs
|
||||
bool write_through; // is write-through
|
||||
bool write_reponse; // enable write response
|
||||
uint16_t mshr_size; // MSHR buffer size
|
||||
uint8_t latency; // pipeline latency
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t read_misses;
|
||||
uint64_t write_misses;
|
||||
uint64_t evictions;
|
||||
uint64_t pipeline_stalls;
|
||||
uint64_t bank_stalls;
|
||||
uint64_t mshr_stalls;
|
||||
uint64_t mem_latency;
|
||||
struct Config {
|
||||
bool bypass; // cache bypass
|
||||
uint8_t C; // log2 cache size
|
||||
uint8_t L; // log2 line size
|
||||
uint8_t W; // log2 word size
|
||||
uint8_t A; // log2 associativity
|
||||
uint8_t B; // log2 number of banks
|
||||
uint8_t addr_width; // word address bits
|
||||
uint8_t ports_per_bank; // number of ports per bank
|
||||
uint8_t num_inputs; // number of inputs
|
||||
bool write_through; // is write-through
|
||||
bool write_reponse; // enable write response
|
||||
uint16_t mshr_size; // MSHR buffer size
|
||||
uint8_t latency; // pipeline latency
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t read_misses;
|
||||
uint64_t write_misses;
|
||||
uint64_t evictions;
|
||||
uint64_t pipeline_stalls;
|
||||
uint64_t bank_stalls;
|
||||
uint64_t mshr_stalls;
|
||||
uint64_t mem_latency;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, read_misses(0)
|
||||
, write_misses(0)
|
||||
, evictions(0)
|
||||
, pipeline_stalls(0)
|
||||
, bank_stalls(0)
|
||||
, mshr_stalls(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, read_misses(0)
|
||||
, write_misses(0)
|
||||
, evictions(0)
|
||||
, pipeline_stalls(0)
|
||||
, bank_stalls(0)
|
||||
, mshr_stalls(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->reads += rhs.reads;
|
||||
this->writes += rhs.writes;
|
||||
this->read_misses += rhs.read_misses;
|
||||
this->write_misses += rhs.write_misses;
|
||||
this->evictions += rhs.evictions;
|
||||
this->pipeline_stalls += rhs.pipeline_stalls;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
this->mshr_stalls += rhs.mshr_stalls;
|
||||
this->mem_latency += rhs.mem_latency;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->reads += rhs.reads;
|
||||
this->writes += rhs.writes;
|
||||
this->read_misses += rhs.read_misses;
|
||||
this->write_misses += rhs.write_misses;
|
||||
this->evictions += rhs.evictions;
|
||||
this->pipeline_stalls += rhs.pipeline_stalls;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
this->mshr_stalls += rhs.mshr_stalls;
|
||||
this->mem_latency += rhs.mem_latency;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> CoreReqPorts;
|
||||
std::vector<SimPort<MemRsp>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
std::vector<SimPort<MemReq>> CoreReqPorts;
|
||||
std::vector<SimPort<MemRsp>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~CacheSim();
|
||||
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~CacheSim();
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
|
@ -21,25 +21,25 @@ namespace vortex {
|
|||
|
||||
class BaseDCRS {
|
||||
public:
|
||||
uint32_t read(uint32_t addr) const {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
return states_.at(state);
|
||||
}
|
||||
uint32_t read(uint32_t addr) const {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
return states_.at(state);
|
||||
}
|
||||
|
||||
void write(uint32_t addr, uint32_t value) {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
states_.at(state) = value;
|
||||
}
|
||||
void write(uint32_t addr, uint32_t value) {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
states_.at(state) = value;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
|
||||
private:
|
||||
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
|
||||
};
|
||||
|
||||
class DCRS {
|
||||
public:
|
||||
void write(uint32_t addr, uint32_t value);
|
||||
|
||||
BaseDCRS base_dcrs;
|
||||
void write(uint32_t addr, uint32_t value);
|
||||
|
||||
BaseDCRS base_dcrs;
|
||||
};
|
||||
|
||||
}
|
|
@ -21,120 +21,120 @@ namespace vortex {
|
|||
|
||||
class Dispatcher : public SimObject<Dispatcher> {
|
||||
public:
|
||||
std::vector<SimPort<instr_trace_t*>> Outputs;
|
||||
std::vector<SimPort<instr_trace_t*>> Outputs;
|
||||
|
||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, Inputs_(ISSUE_WIDTH, this)
|
||||
, arch_(arch)
|
||||
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
|
||||
, buf_size_(buf_size)
|
||||
, block_size_(block_size)
|
||||
, num_lanes_(num_lanes)
|
||||
, batch_count_(ISSUE_WIDTH / block_size)
|
||||
, pid_count_(arch.num_threads() / num_lanes)
|
||||
, batch_idx_(0)
|
||||
, start_p_(block_size, 0)
|
||||
{}
|
||||
|
||||
virtual ~Dispatcher() {}
|
||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, Inputs_(ISSUE_WIDTH, this)
|
||||
, arch_(arch)
|
||||
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
|
||||
, buf_size_(buf_size)
|
||||
, block_size_(block_size)
|
||||
, num_lanes_(num_lanes)
|
||||
, batch_count_(ISSUE_WIDTH / block_size)
|
||||
, pid_count_(arch.num_threads() / num_lanes)
|
||||
, batch_idx_(0)
|
||||
, start_p_(block_size, 0)
|
||||
{}
|
||||
|
||||
virtual ~Dispatcher() {}
|
||||
|
||||
virtual void reset() {
|
||||
batch_idx_ = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
virtual void reset() {
|
||||
batch_idx_ = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& queue = queues_.at(i);
|
||||
if (queue.empty())
|
||||
continue;
|
||||
auto trace = queue.front();
|
||||
Inputs_.at(i).push(trace, 1);
|
||||
queue.pop();
|
||||
}
|
||||
virtual void tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& queue = queues_.at(i);
|
||||
if (queue.empty())
|
||||
continue;
|
||||
auto trace = queue.front();
|
||||
Inputs_.at(i).push(trace, 1);
|
||||
queue.pop();
|
||||
}
|
||||
|
||||
uint32_t block_sent = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
uint32_t i = batch_idx_ * block_size_ + b;
|
||||
auto& input = Inputs_.at(i);
|
||||
if (input.empty()) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
auto new_trace = trace;
|
||||
if (pid_count_ != 1) {
|
||||
auto start_p = start_p_.at(b);
|
||||
if (start_p == -1) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
int start(-1), end(-1);
|
||||
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
||||
if (!trace->tmask.test(j))
|
||||
continue;
|
||||
if (start == -1)
|
||||
start = j;
|
||||
end = j;
|
||||
}
|
||||
start /= num_lanes_;
|
||||
end /= num_lanes_;
|
||||
if (start != end) {
|
||||
new_trace = new instr_trace_t(*trace);
|
||||
new_trace->eop = false;
|
||||
start_p_.at(b) = start + 1;
|
||||
} else {
|
||||
start_p_.at(b) = -1;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
}
|
||||
new_trace->pid = start;
|
||||
new_trace->sop = (0 == start_p);
|
||||
ThreadMask tmask;
|
||||
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
||||
tmask[j] = trace->tmask[j];
|
||||
}
|
||||
new_trace->tmask = tmask;
|
||||
} else {
|
||||
new_trace->pid = 0;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
}
|
||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||
output.push(new_trace, 1);
|
||||
}
|
||||
if (block_sent == block_size_) {
|
||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
uint32_t block_sent = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
uint32_t i = batch_idx_ * block_size_ + b;
|
||||
auto& input = Inputs_.at(i);
|
||||
if (input.empty()) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
auto new_trace = trace;
|
||||
if (pid_count_ != 1) {
|
||||
auto start_p = start_p_.at(b);
|
||||
if (start_p == -1) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
int start(-1), end(-1);
|
||||
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
||||
if (!trace->tmask.test(j))
|
||||
continue;
|
||||
if (start == -1)
|
||||
start = j;
|
||||
end = j;
|
||||
}
|
||||
start /= num_lanes_;
|
||||
end /= num_lanes_;
|
||||
if (start != end) {
|
||||
new_trace = new instr_trace_t(*trace);
|
||||
new_trace->eop = false;
|
||||
start_p_.at(b) = start + 1;
|
||||
} else {
|
||||
start_p_.at(b) = -1;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
}
|
||||
new_trace->pid = start;
|
||||
new_trace->sop = (0 == start_p);
|
||||
ThreadMask tmask;
|
||||
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
||||
tmask[j] = trace->tmask[j];
|
||||
}
|
||||
new_trace->tmask = tmask;
|
||||
} else {
|
||||
new_trace->pid = 0;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
}
|
||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||
output.push(new_trace, 1);
|
||||
}
|
||||
if (block_sent == block_size_) {
|
||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool push(uint32_t issue_index, instr_trace_t* trace) {
|
||||
auto& queue = queues_.at(issue_index);
|
||||
if (queue.size() >= buf_size_)
|
||||
return false;
|
||||
queue.push(trace);
|
||||
return true;
|
||||
}
|
||||
bool push(uint32_t issue_index, instr_trace_t* trace) {
|
||||
auto& queue = queues_.at(issue_index);
|
||||
if (queue.size() >= buf_size_)
|
||||
return false;
|
||||
queue.push(trace);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<SimPort<instr_trace_t*>> Inputs_;
|
||||
const Arch& arch_;
|
||||
std::vector<std::queue<instr_trace_t*>> queues_;
|
||||
uint32_t buf_size_;
|
||||
uint32_t block_size_;
|
||||
uint32_t num_lanes_;
|
||||
uint32_t batch_count_;
|
||||
uint32_t pid_count_;
|
||||
uint32_t batch_idx_;
|
||||
std::vector<int> start_p_;
|
||||
std::vector<SimPort<instr_trace_t*>> Inputs_;
|
||||
const Arch& arch_;
|
||||
std::vector<std::queue<instr_trace_t*>> queues_;
|
||||
uint32_t buf_size_;
|
||||
uint32_t block_size_;
|
||||
uint32_t num_lanes_;
|
||||
uint32_t batch_count_;
|
||||
uint32_t pid_count_;
|
||||
uint32_t batch_idx_;
|
||||
std::vector<int> start_p_;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -19,39 +19,39 @@
|
|||
namespace vortex {
|
||||
|
||||
class IBuffer {
|
||||
public:
|
||||
IBuffer(uint32_t size)
|
||||
: capacity_(size)
|
||||
{}
|
||||
public:
|
||||
IBuffer(uint32_t size)
|
||||
: capacity_(size)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return entries_.empty();
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (entries_.size() == capacity_);
|
||||
}
|
||||
bool empty() const {
|
||||
return entries_.empty();
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (entries_.size() == capacity_);
|
||||
}
|
||||
|
||||
instr_trace_t* top() const {
|
||||
return entries_.front();
|
||||
}
|
||||
instr_trace_t* top() const {
|
||||
return entries_.front();
|
||||
}
|
||||
|
||||
void push(instr_trace_t* trace) {
|
||||
entries_.emplace(trace);
|
||||
}
|
||||
void push(instr_trace_t* trace) {
|
||||
entries_.emplace(trace);
|
||||
}
|
||||
|
||||
void pop() {
|
||||
return entries_.pop();
|
||||
}
|
||||
void pop() {
|
||||
return entries_.pop();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
std::queue<instr_trace_t*> empty;
|
||||
std::swap(entries_, empty );
|
||||
}
|
||||
void clear() {
|
||||
std::queue<instr_trace_t*> empty;
|
||||
std::swap(entries_, empty );
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<instr_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
std::queue<instr_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
};
|
||||
|
||||
}
|
|
@ -21,118 +21,118 @@ using namespace vortex;
|
|||
|
||||
class LocalMem::Impl {
|
||||
protected:
|
||||
LocalMem* simobject_;
|
||||
Config config_;
|
||||
RAM ram_;
|
||||
uint32_t bank_sel_addr_start_;
|
||||
uint32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
LocalMem* simobject_;
|
||||
Config config_;
|
||||
RAM ram_;
|
||||
uint32_t bank_sel_addr_start_;
|
||||
uint32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
uint64_t to_local_addr(uint64_t addr) {
|
||||
uint32_t total_lines = config_.capacity / config_.line_size;
|
||||
uint32_t line_bits = log2ceil(total_lines);
|
||||
uint32_t offset = bit_getw(addr, 0, line_bits-1);
|
||||
return offset;
|
||||
}
|
||||
uint64_t to_local_addr(uint64_t addr) {
|
||||
uint32_t total_lines = config_.capacity / config_.line_size;
|
||||
uint32_t line_bits = log2ceil(total_lines);
|
||||
uint32_t offset = bit_getw(addr, 0, line_bits-1);
|
||||
return offset;
|
||||
}
|
||||
|
||||
public:
|
||||
Impl(LocalMem* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, ram_(config.capacity)
|
||||
, bank_sel_addr_start_(0)
|
||||
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
|
||||
{}
|
||||
|
||||
virtual ~Impl() {}
|
||||
Impl(LocalMem* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, ram_(config.capacity)
|
||||
, bank_sel_addr_start_(0)
|
||||
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
|
||||
{}
|
||||
|
||||
virtual ~Impl() {}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
|
||||
void read(void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.read(data, s_addr, size);
|
||||
}
|
||||
void read(void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.read(data, s_addr, size);
|
||||
}
|
||||
|
||||
void write(const void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.write(data, s_addr, size);
|
||||
}
|
||||
void write(const void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.write(data, s_addr, size);
|
||||
}
|
||||
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(config_.num_banks);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = simobject_->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(config_.num_banks);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = simobject_->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
uint32_t bank_id = 0;
|
||||
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
|
||||
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
}
|
||||
uint32_t bank_id = 0;
|
||||
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
|
||||
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
}
|
||||
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id)) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id)) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
||||
}
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
}
|
||||
}
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LocalMem::LocalMem(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<LocalMem>(ctx, name)
|
||||
, Inputs(config.num_reqs, this)
|
||||
, Outputs(config.num_reqs, this)
|
||||
, impl_(new Impl(this, config))
|
||||
: SimObject<LocalMem>(ctx, name)
|
||||
, Inputs(config.num_reqs, this)
|
||||
, Outputs(config.num_reqs, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
LocalMem::~LocalMem() {
|
||||
delete impl_;
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void LocalMem::reset() {
|
||||
impl_->reset();
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void LocalMem::read(void* data, uint64_t addr, uint32_t size) {
|
||||
impl_->read(data, addr, size);
|
||||
impl_->read(data, addr, size);
|
||||
}
|
||||
|
||||
void LocalMem::write(const void* data, uint64_t addr, uint32_t size) {
|
||||
impl_->write(data, addr, size);
|
||||
impl_->write(data, addr, size);
|
||||
}
|
||||
|
||||
void LocalMem::tick() {
|
||||
impl_->tick();
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const LocalMem::PerfStats& LocalMem::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
return impl_->perf_stats();
|
||||
}
|
|
@ -32,105 +32,105 @@ using namespace vortex;
|
|||
|
||||
class MemSim::Impl {
|
||||
private:
|
||||
MemSim* simobject_;
|
||||
Config config_;
|
||||
PerfStats perf_stats_;
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
MemSim* simobject_;
|
||||
Config config_;
|
||||
PerfStats perf_stats_;
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
public:
|
||||
|
||||
Impl(MemSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
{
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(config.channels));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(config.num_cores);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
}
|
||||
Impl(MemSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
{
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(config.channels));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(config.num_cores);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
}
|
||||
~Impl() {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
|
||||
if (req.type == ramulator::Request::Type::WRITE)
|
||||
return;
|
||||
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
|
||||
simobject_->MemRspPort.push(mem_rsp, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_rsp);
|
||||
}
|
||||
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
|
||||
if (req.type == ramulator::Request::Type::WRITE)
|
||||
return;
|
||||
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
|
||||
simobject_->MemRspPort.push(mem_rsp, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_rsp);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
|
||||
void tick() {
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = SimPlatform::instance().cycles();
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
dram_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
dram_->tick();
|
||||
}
|
||||
|
||||
if (simobject_->MemReqPort.empty())
|
||||
return;
|
||||
|
||||
auto& mem_req = simobject_->MemReqPort.front();
|
||||
void tick() {
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = SimPlatform::instance().cycles();
|
||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||
dram_->tick();
|
||||
} else {
|
||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||
dram_->tick();
|
||||
}
|
||||
|
||||
if (simobject_->MemReqPort.empty())
|
||||
return;
|
||||
|
||||
auto& mem_req = simobject_->MemReqPort.front();
|
||||
|
||||
ramulator::Request dram_req(
|
||||
mem_req.addr,
|
||||
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
||||
mem_req.cid
|
||||
);
|
||||
ramulator::Request dram_req(
|
||||
mem_req.addr,
|
||||
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
||||
mem_req.cid
|
||||
);
|
||||
|
||||
if (!dram_->send(dram_req))
|
||||
return;
|
||||
|
||||
if (mem_req.write) {
|
||||
++perf_stats_.writes;
|
||||
} else {
|
||||
++perf_stats_.reads;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
if (!dram_->send(dram_req))
|
||||
return;
|
||||
|
||||
if (mem_req.write) {
|
||||
++perf_stats_.writes;
|
||||
} else {
|
||||
++perf_stats_.reads;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
|
||||
simobject_->MemReqPort.pop();
|
||||
}
|
||||
simobject_->MemReqPort.pop();
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
MemSim::~MemSim() {
|
||||
delete impl_;
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void MemSim::reset() {
|
||||
impl_->reset();
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void MemSim::tick() {
|
||||
impl_->tick();
|
||||
impl_->tick();
|
||||
}
|
|
@ -20,36 +20,36 @@ namespace vortex {
|
|||
|
||||
class MemSim : public SimObject<MemSim>{
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t channels;
|
||||
uint32_t num_cores;
|
||||
};
|
||||
struct Config {
|
||||
uint32_t channels;
|
||||
uint32_t num_cores;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
{}
|
||||
};
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
{}
|
||||
};
|
||||
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
MemSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~MemSim();
|
||||
MemSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~MemSim();
|
||||
|
||||
void reset();
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
void tick();
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
};
|
|
@ -23,37 +23,37 @@ public:
|
|||
SimPort<instr_trace_t*> Output;
|
||||
|
||||
Operand(const SimContext& ctx)
|
||||
: SimObject<Operand>(ctx, "Operand")
|
||||
, Input(this)
|
||||
, Output(this)
|
||||
: SimObject<Operand>(ctx, "Operand")
|
||||
, Input(this)
|
||||
, Output(this)
|
||||
{}
|
||||
|
||||
|
||||
virtual ~Operand() {}
|
||||
|
||||
virtual void reset() {}
|
||||
|
||||
virtual void tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
|
||||
int delay = 1;
|
||||
for (int i = 0; i < MAX_NUM_REGS; ++i) {
|
||||
bool is_iregs = trace->used_iregs.test(i);
|
||||
bool is_fregs = trace->used_fregs.test(i);
|
||||
bool is_vregs = trace->used_vregs.test(i);
|
||||
if (is_iregs || is_fregs || is_vregs) {
|
||||
if (is_iregs && i == 0)
|
||||
continue;
|
||||
++delay;
|
||||
}
|
||||
}
|
||||
int delay = 1;
|
||||
for (int i = 0; i < MAX_NUM_REGS; ++i) {
|
||||
bool is_iregs = trace->used_iregs.test(i);
|
||||
bool is_fregs = trace->used_fregs.test(i);
|
||||
bool is_vregs = trace->used_vregs.test(i);
|
||||
if (is_iregs || is_fregs || is_vregs) {
|
||||
if (is_iregs && i == 0)
|
||||
continue;
|
||||
++delay;
|
||||
}
|
||||
}
|
||||
|
||||
Output.push(trace, delay);
|
||||
|
||||
DT(3, "pipeline-operands: " << *trace);
|
||||
Output.push(trace, delay);
|
||||
|
||||
DT(3, "pipeline-operands: " << *trace);
|
||||
|
||||
Input.pop();
|
||||
Input.pop();
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -22,98 +22,98 @@ namespace vortex {
|
|||
class Scoreboard {
|
||||
public:
|
||||
|
||||
struct reg_use_t {
|
||||
RegType reg_type;
|
||||
uint32_t reg_id;
|
||||
FUType fu_type;
|
||||
SfuType sfu_type;
|
||||
uint64_t uuid;
|
||||
};
|
||||
|
||||
Scoreboard(const Arch &arch)
|
||||
: in_use_iregs_(arch.num_warps())
|
||||
, in_use_fregs_(arch.num_warps())
|
||||
{
|
||||
this->clear();
|
||||
}
|
||||
struct reg_use_t {
|
||||
RegType reg_type;
|
||||
uint32_t reg_id;
|
||||
FUType fu_type;
|
||||
SfuType sfu_type;
|
||||
uint64_t uuid;
|
||||
};
|
||||
|
||||
Scoreboard(const Arch &arch)
|
||||
: in_use_iregs_(arch.num_warps())
|
||||
, in_use_fregs_(arch.num_warps())
|
||||
{
|
||||
this->clear();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
|
||||
in_use_iregs_.at(i).reset();
|
||||
in_use_fregs_.at(i).reset();
|
||||
}
|
||||
owners_.clear();
|
||||
}
|
||||
void clear() {
|
||||
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
|
||||
in_use_iregs_.at(i).reset();
|
||||
in_use_fregs_.at(i).reset();
|
||||
}
|
||||
owners_.clear();
|
||||
}
|
||||
|
||||
bool in_use(instr_trace_t* trace) const {
|
||||
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|
||||
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
|
||||
}
|
||||
bool in_use(instr_trace_t* trace) const {
|
||||
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|
||||
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
|
||||
}
|
||||
|
||||
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
|
||||
std::vector<reg_use_t> out;
|
||||
|
||||
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
|
||||
auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
|
||||
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
|
||||
std::vector<reg_use_t> out;
|
||||
|
||||
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
|
||||
auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
|
||||
|
||||
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
||||
if (used_iregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
||||
if (used_iregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
||||
if (used_fregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
||||
if (used_fregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void reserve(instr_trace_t* trace) {
|
||||
assert(trace->wb);
|
||||
switch (trace->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(trace->wid).set(trace->rdest);
|
||||
break;
|
||||
case RegType::Float:
|
||||
in_use_fregs_.at(trace->wid).set(trace->rdest);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||
assert(owners_.count(tag) == 0);
|
||||
owners_[tag] = trace;
|
||||
assert((int)trace->fu_type < 5);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void reserve(instr_trace_t* trace) {
|
||||
assert(trace->wb);
|
||||
switch (trace->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(trace->wid).set(trace->rdest);
|
||||
break;
|
||||
case RegType::Float:
|
||||
in_use_fregs_.at(trace->wid).set(trace->rdest);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||
assert(owners_.count(tag) == 0);
|
||||
owners_[tag] = trace;
|
||||
assert((int)trace->fu_type < 5);
|
||||
}
|
||||
|
||||
void release(instr_trace_t* trace) {
|
||||
assert(trace->wb);
|
||||
switch (trace->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(trace->wid).reset(trace->rdest);
|
||||
break;
|
||||
case RegType::Float:
|
||||
in_use_fregs_.at(trace->wid).reset(trace->rdest);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||
owners_.erase(tag);
|
||||
}
|
||||
void release(instr_trace_t* trace) {
|
||||
assert(trace->wb);
|
||||
switch (trace->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(trace->wid).reset(trace->rdest);
|
||||
break;
|
||||
case RegType::Float:
|
||||
in_use_fregs_.at(trace->wid).reset(trace->rdest);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||
owners_.erase(tag);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::unordered_map<uint32_t, instr_trace_t*> owners_;
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::unordered_map<uint32_t, instr_trace_t*> owners_;
|
||||
};
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue