mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-06-28 09:37:38 -04:00
tabs cleanup
This commit is contained in:
parent
a8f2bb30da
commit
07c063031f
11 changed files with 1112 additions and 1112 deletions
|
@ -19,88 +19,88 @@ namespace vortex {
|
||||||
|
|
||||||
class CacheCluster : public SimObject<CacheCluster> {
|
class CacheCluster : public SimObject<CacheCluster> {
|
||||||
public:
|
public:
|
||||||
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
||||||
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
||||||
SimPort<MemReq> MemReqPort;
|
SimPort<MemReq> MemReqPort;
|
||||||
SimPort<MemRsp> MemRspPort;
|
SimPort<MemRsp> MemRspPort;
|
||||||
|
|
||||||
CacheCluster(const SimContext& ctx,
|
CacheCluster(const SimContext& ctx,
|
||||||
const char* name,
|
const char* name,
|
||||||
uint32_t num_units,
|
uint32_t num_units,
|
||||||
uint32_t num_caches,
|
uint32_t num_caches,
|
||||||
uint32_t num_requests,
|
uint32_t num_requests,
|
||||||
const CacheSim::Config& config)
|
const CacheSim::Config& config)
|
||||||
: SimObject(ctx, name)
|
: SimObject(ctx, name)
|
||||||
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
|
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||||
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
|
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||||
, MemReqPort(this)
|
, MemReqPort(this)
|
||||||
, MemRspPort(this)
|
, MemRspPort(this)
|
||||||
, caches_(MAX(num_caches, 0x1)) {
|
, caches_(MAX(num_caches, 0x1)) {
|
||||||
|
|
||||||
CacheSim::Config config2(config);
|
CacheSim::Config config2(config);
|
||||||
if (0 == num_caches) {
|
if (0 == num_caches) {
|
||||||
num_caches = 1;
|
num_caches = 1;
|
||||||
config2.bypass = true;
|
config2.bypass = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
char sname[100];
|
char sname[100];
|
||||||
|
|
||||||
std::vector<MemSwitch::Ptr> unit_arbs(num_units);
|
std::vector<MemSwitch::Ptr> unit_arbs(num_units);
|
||||||
for (uint32_t u = 0; u < num_units; ++u) {
|
for (uint32_t u = 0; u < num_units; ++u) {
|
||||||
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
|
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
|
||||||
unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
|
unit_arbs.at(u) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
|
||||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||||
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
|
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
|
||||||
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
|
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
|
std::vector<MemSwitch::Ptr> mem_arbs(config.num_inputs);
|
||||||
for (uint32_t i = 0; i < config.num_inputs; ++i) {
|
for (uint32_t i = 0; i < config.num_inputs; ++i) {
|
||||||
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
|
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
|
||||||
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
|
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
|
||||||
for (uint32_t u = 0; u < num_units; ++u) {
|
for (uint32_t u = 0; u < num_units; ++u) {
|
||||||
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
|
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
|
||||||
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
|
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
snprintf(sname, 100, "%s-cache-arb", name);
|
snprintf(sname, 100, "%s-cache-arb", name);
|
||||||
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||||
caches_.at(i) = CacheSim::Create(sname, config2);
|
caches_.at(i) = CacheSim::Create(sname, config2);
|
||||||
|
|
||||||
for (uint32_t j = 0; j < config.num_inputs; ++j) {
|
for (uint32_t j = 0; j < config.num_inputs; ++j) {
|
||||||
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||||
}
|
}
|
||||||
|
|
||||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||||
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
~CacheCluster() {}
|
~CacheCluster() {}
|
||||||
|
|
||||||
void reset() {}
|
void reset() {}
|
||||||
|
|
||||||
void tick() {}
|
void tick() {}
|
||||||
|
|
||||||
|
CacheSim::PerfStats perf_stats() const {
|
||||||
|
CacheSim::PerfStats perf;
|
||||||
|
for (auto cache : caches_) {
|
||||||
|
perf += cache->perf_stats();
|
||||||
|
}
|
||||||
|
return perf;
|
||||||
|
}
|
||||||
|
|
||||||
CacheSim::PerfStats perf_stats() const {
|
|
||||||
CacheSim::PerfStats perf;
|
|
||||||
for (auto cache : caches_) {
|
|
||||||
perf += cache->perf_stats();
|
|
||||||
}
|
|
||||||
return perf;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<CacheSim::Ptr> caches_;
|
std::vector<CacheSim::Ptr> caches_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -20,76 +20,76 @@ namespace vortex {
|
||||||
|
|
||||||
class CacheSim : public SimObject<CacheSim> {
|
class CacheSim : public SimObject<CacheSim> {
|
||||||
public:
|
public:
|
||||||
struct Config {
|
struct Config {
|
||||||
bool bypass; // cache bypass
|
bool bypass; // cache bypass
|
||||||
uint8_t C; // log2 cache size
|
uint8_t C; // log2 cache size
|
||||||
uint8_t L; // log2 line size
|
uint8_t L; // log2 line size
|
||||||
uint8_t W; // log2 word size
|
uint8_t W; // log2 word size
|
||||||
uint8_t A; // log2 associativity
|
uint8_t A; // log2 associativity
|
||||||
uint8_t B; // log2 number of banks
|
uint8_t B; // log2 number of banks
|
||||||
uint8_t addr_width; // word address bits
|
uint8_t addr_width; // word address bits
|
||||||
uint8_t ports_per_bank; // number of ports per bank
|
uint8_t ports_per_bank; // number of ports per bank
|
||||||
uint8_t num_inputs; // number of inputs
|
uint8_t num_inputs; // number of inputs
|
||||||
bool write_through; // is write-through
|
bool write_through; // is write-through
|
||||||
bool write_reponse; // enable write response
|
bool write_reponse; // enable write response
|
||||||
uint16_t mshr_size; // MSHR buffer size
|
uint16_t mshr_size; // MSHR buffer size
|
||||||
uint8_t latency; // pipeline latency
|
uint8_t latency; // pipeline latency
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PerfStats {
|
struct PerfStats {
|
||||||
uint64_t reads;
|
uint64_t reads;
|
||||||
uint64_t writes;
|
uint64_t writes;
|
||||||
uint64_t read_misses;
|
uint64_t read_misses;
|
||||||
uint64_t write_misses;
|
uint64_t write_misses;
|
||||||
uint64_t evictions;
|
uint64_t evictions;
|
||||||
uint64_t pipeline_stalls;
|
uint64_t pipeline_stalls;
|
||||||
uint64_t bank_stalls;
|
uint64_t bank_stalls;
|
||||||
uint64_t mshr_stalls;
|
uint64_t mshr_stalls;
|
||||||
uint64_t mem_latency;
|
uint64_t mem_latency;
|
||||||
|
|
||||||
PerfStats()
|
PerfStats()
|
||||||
: reads(0)
|
: reads(0)
|
||||||
, writes(0)
|
, writes(0)
|
||||||
, read_misses(0)
|
, read_misses(0)
|
||||||
, write_misses(0)
|
, write_misses(0)
|
||||||
, evictions(0)
|
, evictions(0)
|
||||||
, pipeline_stalls(0)
|
, pipeline_stalls(0)
|
||||||
, bank_stalls(0)
|
, bank_stalls(0)
|
||||||
, mshr_stalls(0)
|
, mshr_stalls(0)
|
||||||
, mem_latency(0)
|
, mem_latency(0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
PerfStats& operator+=(const PerfStats& rhs) {
|
PerfStats& operator+=(const PerfStats& rhs) {
|
||||||
this->reads += rhs.reads;
|
this->reads += rhs.reads;
|
||||||
this->writes += rhs.writes;
|
this->writes += rhs.writes;
|
||||||
this->read_misses += rhs.read_misses;
|
this->read_misses += rhs.read_misses;
|
||||||
this->write_misses += rhs.write_misses;
|
this->write_misses += rhs.write_misses;
|
||||||
this->evictions += rhs.evictions;
|
this->evictions += rhs.evictions;
|
||||||
this->pipeline_stalls += rhs.pipeline_stalls;
|
this->pipeline_stalls += rhs.pipeline_stalls;
|
||||||
this->bank_stalls += rhs.bank_stalls;
|
this->bank_stalls += rhs.bank_stalls;
|
||||||
this->mshr_stalls += rhs.mshr_stalls;
|
this->mshr_stalls += rhs.mshr_stalls;
|
||||||
this->mem_latency += rhs.mem_latency;
|
this->mem_latency += rhs.mem_latency;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<SimPort<MemReq>> CoreReqPorts;
|
std::vector<SimPort<MemReq>> CoreReqPorts;
|
||||||
std::vector<SimPort<MemRsp>> CoreRspPorts;
|
std::vector<SimPort<MemRsp>> CoreRspPorts;
|
||||||
SimPort<MemReq> MemReqPort;
|
SimPort<MemReq> MemReqPort;
|
||||||
SimPort<MemRsp> MemRspPort;
|
SimPort<MemRsp> MemRspPort;
|
||||||
|
|
||||||
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
||||||
~CacheSim();
|
~CacheSim();
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
void tick();
|
void tick();
|
||||||
|
|
||||||
const PerfStats& perf_stats() const;
|
const PerfStats& perf_stats() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Impl;
|
class Impl;
|
||||||
Impl* impl_;
|
Impl* impl_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -21,25 +21,25 @@ namespace vortex {
|
||||||
|
|
||||||
class BaseDCRS {
|
class BaseDCRS {
|
||||||
public:
|
public:
|
||||||
uint32_t read(uint32_t addr) const {
|
uint32_t read(uint32_t addr) const {
|
||||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||||
return states_.at(state);
|
return states_.at(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write(uint32_t addr, uint32_t value) {
|
void write(uint32_t addr, uint32_t value) {
|
||||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||||
states_.at(state) = value;
|
states_.at(state) = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
|
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class DCRS {
|
class DCRS {
|
||||||
public:
|
public:
|
||||||
void write(uint32_t addr, uint32_t value);
|
void write(uint32_t addr, uint32_t value);
|
||||||
|
|
||||||
BaseDCRS base_dcrs;
|
BaseDCRS base_dcrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -21,120 +21,120 @@ namespace vortex {
|
||||||
|
|
||||||
class Dispatcher : public SimObject<Dispatcher> {
|
class Dispatcher : public SimObject<Dispatcher> {
|
||||||
public:
|
public:
|
||||||
std::vector<SimPort<instr_trace_t*>> Outputs;
|
std::vector<SimPort<instr_trace_t*>> Outputs;
|
||||||
|
|
||||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||||
, Outputs(ISSUE_WIDTH, this)
|
, Outputs(ISSUE_WIDTH, this)
|
||||||
, Inputs_(ISSUE_WIDTH, this)
|
, Inputs_(ISSUE_WIDTH, this)
|
||||||
, arch_(arch)
|
, arch_(arch)
|
||||||
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
|
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
|
||||||
, buf_size_(buf_size)
|
, buf_size_(buf_size)
|
||||||
, block_size_(block_size)
|
, block_size_(block_size)
|
||||||
, num_lanes_(num_lanes)
|
, num_lanes_(num_lanes)
|
||||||
, batch_count_(ISSUE_WIDTH / block_size)
|
, batch_count_(ISSUE_WIDTH / block_size)
|
||||||
, pid_count_(arch.num_threads() / num_lanes)
|
, pid_count_(arch.num_threads() / num_lanes)
|
||||||
, batch_idx_(0)
|
, batch_idx_(0)
|
||||||
, start_p_(block_size, 0)
|
, start_p_(block_size, 0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
virtual ~Dispatcher() {}
|
virtual ~Dispatcher() {}
|
||||||
|
|
||||||
virtual void reset() {
|
virtual void reset() {
|
||||||
batch_idx_ = 0;
|
batch_idx_ = 0;
|
||||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||||
start_p_.at(b) = 0;
|
start_p_.at(b) = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void tick() {
|
virtual void tick() {
|
||||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||||
auto& queue = queues_.at(i);
|
auto& queue = queues_.at(i);
|
||||||
if (queue.empty())
|
if (queue.empty())
|
||||||
continue;
|
continue;
|
||||||
auto trace = queue.front();
|
auto trace = queue.front();
|
||||||
Inputs_.at(i).push(trace, 1);
|
Inputs_.at(i).push(trace, 1);
|
||||||
queue.pop();
|
queue.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t block_sent = 0;
|
uint32_t block_sent = 0;
|
||||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||||
uint32_t i = batch_idx_ * block_size_ + b;
|
uint32_t i = batch_idx_ * block_size_ + b;
|
||||||
auto& input = Inputs_.at(i);
|
auto& input = Inputs_.at(i);
|
||||||
if (input.empty()) {
|
if (input.empty()) {
|
||||||
++block_sent;
|
++block_sent;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto& output = Outputs.at(i);
|
auto& output = Outputs.at(i);
|
||||||
auto trace = input.front();
|
auto trace = input.front();
|
||||||
auto new_trace = trace;
|
auto new_trace = trace;
|
||||||
if (pid_count_ != 1) {
|
if (pid_count_ != 1) {
|
||||||
auto start_p = start_p_.at(b);
|
auto start_p = start_p_.at(b);
|
||||||
if (start_p == -1) {
|
if (start_p == -1) {
|
||||||
++block_sent;
|
++block_sent;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int start(-1), end(-1);
|
int start(-1), end(-1);
|
||||||
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
||||||
if (!trace->tmask.test(j))
|
if (!trace->tmask.test(j))
|
||||||
continue;
|
continue;
|
||||||
if (start == -1)
|
if (start == -1)
|
||||||
start = j;
|
start = j;
|
||||||
end = j;
|
end = j;
|
||||||
}
|
}
|
||||||
start /= num_lanes_;
|
start /= num_lanes_;
|
||||||
end /= num_lanes_;
|
end /= num_lanes_;
|
||||||
if (start != end) {
|
if (start != end) {
|
||||||
new_trace = new instr_trace_t(*trace);
|
new_trace = new instr_trace_t(*trace);
|
||||||
new_trace->eop = false;
|
new_trace->eop = false;
|
||||||
start_p_.at(b) = start + 1;
|
start_p_.at(b) = start + 1;
|
||||||
} else {
|
} else {
|
||||||
start_p_.at(b) = -1;
|
start_p_.at(b) = -1;
|
||||||
input.pop();
|
input.pop();
|
||||||
++block_sent;
|
++block_sent;
|
||||||
}
|
}
|
||||||
new_trace->pid = start;
|
new_trace->pid = start;
|
||||||
new_trace->sop = (0 == start_p);
|
new_trace->sop = (0 == start_p);
|
||||||
ThreadMask tmask;
|
ThreadMask tmask;
|
||||||
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
||||||
tmask[j] = trace->tmask[j];
|
tmask[j] = trace->tmask[j];
|
||||||
}
|
}
|
||||||
new_trace->tmask = tmask;
|
new_trace->tmask = tmask;
|
||||||
} else {
|
} else {
|
||||||
new_trace->pid = 0;
|
new_trace->pid = 0;
|
||||||
input.pop();
|
input.pop();
|
||||||
++block_sent;
|
++block_sent;
|
||||||
}
|
}
|
||||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||||
output.push(new_trace, 1);
|
output.push(new_trace, 1);
|
||||||
}
|
}
|
||||||
if (block_sent == block_size_) {
|
if (block_sent == block_size_) {
|
||||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||||
start_p_.at(b) = 0;
|
start_p_.at(b) = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool push(uint32_t issue_index, instr_trace_t* trace) {
|
bool push(uint32_t issue_index, instr_trace_t* trace) {
|
||||||
auto& queue = queues_.at(issue_index);
|
auto& queue = queues_.at(issue_index);
|
||||||
if (queue.size() >= buf_size_)
|
if (queue.size() >= buf_size_)
|
||||||
return false;
|
return false;
|
||||||
queue.push(trace);
|
queue.push(trace);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<SimPort<instr_trace_t*>> Inputs_;
|
std::vector<SimPort<instr_trace_t*>> Inputs_;
|
||||||
const Arch& arch_;
|
const Arch& arch_;
|
||||||
std::vector<std::queue<instr_trace_t*>> queues_;
|
std::vector<std::queue<instr_trace_t*>> queues_;
|
||||||
uint32_t buf_size_;
|
uint32_t buf_size_;
|
||||||
uint32_t block_size_;
|
uint32_t block_size_;
|
||||||
uint32_t num_lanes_;
|
uint32_t num_lanes_;
|
||||||
uint32_t batch_count_;
|
uint32_t batch_count_;
|
||||||
uint32_t pid_count_;
|
uint32_t pid_count_;
|
||||||
uint32_t batch_idx_;
|
uint32_t batch_idx_;
|
||||||
std::vector<int> start_p_;
|
std::vector<int> start_p_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,39 +19,39 @@
|
||||||
namespace vortex {
|
namespace vortex {
|
||||||
|
|
||||||
class IBuffer {
|
class IBuffer {
|
||||||
public:
|
public:
|
||||||
IBuffer(uint32_t size)
|
IBuffer(uint32_t size)
|
||||||
: capacity_(size)
|
: capacity_(size)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
bool empty() const {
|
bool empty() const {
|
||||||
return entries_.empty();
|
return entries_.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool full() const {
|
bool full() const {
|
||||||
return (entries_.size() == capacity_);
|
return (entries_.size() == capacity_);
|
||||||
}
|
}
|
||||||
|
|
||||||
instr_trace_t* top() const {
|
instr_trace_t* top() const {
|
||||||
return entries_.front();
|
return entries_.front();
|
||||||
}
|
}
|
||||||
|
|
||||||
void push(instr_trace_t* trace) {
|
void push(instr_trace_t* trace) {
|
||||||
entries_.emplace(trace);
|
entries_.emplace(trace);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pop() {
|
void pop() {
|
||||||
return entries_.pop();
|
return entries_.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
std::queue<instr_trace_t*> empty;
|
std::queue<instr_trace_t*> empty;
|
||||||
std::swap(entries_, empty );
|
std::swap(entries_, empty );
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::queue<instr_trace_t*> entries_;
|
std::queue<instr_trace_t*> entries_;
|
||||||
uint32_t capacity_;
|
uint32_t capacity_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -21,118 +21,118 @@ using namespace vortex;
|
||||||
|
|
||||||
class LocalMem::Impl {
|
class LocalMem::Impl {
|
||||||
protected:
|
protected:
|
||||||
LocalMem* simobject_;
|
LocalMem* simobject_;
|
||||||
Config config_;
|
Config config_;
|
||||||
RAM ram_;
|
RAM ram_;
|
||||||
uint32_t bank_sel_addr_start_;
|
uint32_t bank_sel_addr_start_;
|
||||||
uint32_t bank_sel_addr_end_;
|
uint32_t bank_sel_addr_end_;
|
||||||
PerfStats perf_stats_;
|
PerfStats perf_stats_;
|
||||||
|
|
||||||
uint64_t to_local_addr(uint64_t addr) {
|
uint64_t to_local_addr(uint64_t addr) {
|
||||||
uint32_t total_lines = config_.capacity / config_.line_size;
|
uint32_t total_lines = config_.capacity / config_.line_size;
|
||||||
uint32_t line_bits = log2ceil(total_lines);
|
uint32_t line_bits = log2ceil(total_lines);
|
||||||
uint32_t offset = bit_getw(addr, 0, line_bits-1);
|
uint32_t offset = bit_getw(addr, 0, line_bits-1);
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Impl(LocalMem* simobject, const Config& config)
|
Impl(LocalMem* simobject, const Config& config)
|
||||||
: simobject_(simobject)
|
: simobject_(simobject)
|
||||||
, config_(config)
|
, config_(config)
|
||||||
, ram_(config.capacity)
|
, ram_(config.capacity)
|
||||||
, bank_sel_addr_start_(0)
|
, bank_sel_addr_start_(0)
|
||||||
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
|
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
virtual ~Impl() {}
|
virtual ~Impl() {}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
perf_stats_ = PerfStats();
|
perf_stats_ = PerfStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
void read(void* data, uint64_t addr, uint32_t size) {
|
void read(void* data, uint64_t addr, uint32_t size) {
|
||||||
auto s_addr = to_local_addr(addr);
|
auto s_addr = to_local_addr(addr);
|
||||||
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||||
ram_.read(data, s_addr, size);
|
ram_.read(data, s_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write(const void* data, uint64_t addr, uint32_t size) {
|
void write(const void* data, uint64_t addr, uint32_t size) {
|
||||||
auto s_addr = to_local_addr(addr);
|
auto s_addr = to_local_addr(addr);
|
||||||
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
DPH(3, "Local Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||||
ram_.write(data, s_addr, size);
|
ram_.write(data, s_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tick() {
|
void tick() {
|
||||||
std::vector<bool> in_used_banks(config_.num_banks);
|
std::vector<bool> in_used_banks(config_.num_banks);
|
||||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||||
auto& core_req_port = simobject_->Inputs.at(req_id);
|
auto& core_req_port = simobject_->Inputs.at(req_id);
|
||||||
if (core_req_port.empty())
|
if (core_req_port.empty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
auto& core_req = core_req_port.front();
|
auto& core_req = core_req_port.front();
|
||||||
|
|
||||||
uint32_t bank_id = 0;
|
uint32_t bank_id = 0;
|
||||||
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
|
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
|
||||||
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// bank conflict check
|
// bank conflict check
|
||||||
if (in_used_banks.at(bank_id)) {
|
if (in_used_banks.at(bank_id)) {
|
||||||
++perf_stats_.bank_stalls;
|
++perf_stats_.bank_stalls;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
in_used_banks.at(bank_id) = true;
|
in_used_banks.at(bank_id) = true;
|
||||||
|
|
||||||
if (!core_req.write || config_.write_reponse) {
|
if (!core_req.write || config_.write_reponse) {
|
||||||
// send response
|
// send response
|
||||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||||
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// update perf counters
|
// update perf counters
|
||||||
perf_stats_.reads += !core_req.write;
|
perf_stats_.reads += !core_req.write;
|
||||||
perf_stats_.writes += core_req.write;
|
perf_stats_.writes += core_req.write;
|
||||||
|
|
||||||
// remove input
|
// remove input
|
||||||
core_req_port.pop();
|
core_req_port.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const PerfStats& perf_stats() const {
|
const PerfStats& perf_stats() const {
|
||||||
return perf_stats_;
|
return perf_stats_;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
LocalMem::LocalMem(const SimContext& ctx, const char* name, const Config& config)
|
LocalMem::LocalMem(const SimContext& ctx, const char* name, const Config& config)
|
||||||
: SimObject<LocalMem>(ctx, name)
|
: SimObject<LocalMem>(ctx, name)
|
||||||
, Inputs(config.num_reqs, this)
|
, Inputs(config.num_reqs, this)
|
||||||
, Outputs(config.num_reqs, this)
|
, Outputs(config.num_reqs, this)
|
||||||
, impl_(new Impl(this, config))
|
, impl_(new Impl(this, config))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
LocalMem::~LocalMem() {
|
LocalMem::~LocalMem() {
|
||||||
delete impl_;
|
delete impl_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LocalMem::reset() {
|
void LocalMem::reset() {
|
||||||
impl_->reset();
|
impl_->reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LocalMem::read(void* data, uint64_t addr, uint32_t size) {
|
void LocalMem::read(void* data, uint64_t addr, uint32_t size) {
|
||||||
impl_->read(data, addr, size);
|
impl_->read(data, addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LocalMem::write(const void* data, uint64_t addr, uint32_t size) {
|
void LocalMem::write(const void* data, uint64_t addr, uint32_t size) {
|
||||||
impl_->write(data, addr, size);
|
impl_->write(data, addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LocalMem::tick() {
|
void LocalMem::tick() {
|
||||||
impl_->tick();
|
impl_->tick();
|
||||||
}
|
}
|
||||||
|
|
||||||
const LocalMem::PerfStats& LocalMem::perf_stats() const {
|
const LocalMem::PerfStats& LocalMem::perf_stats() const {
|
||||||
return impl_->perf_stats();
|
return impl_->perf_stats();
|
||||||
}
|
}
|
|
@ -32,105 +32,105 @@ using namespace vortex;
|
||||||
|
|
||||||
class MemSim::Impl {
|
class MemSim::Impl {
|
||||||
private:
|
private:
|
||||||
MemSim* simobject_;
|
MemSim* simobject_;
|
||||||
Config config_;
|
Config config_;
|
||||||
PerfStats perf_stats_;
|
PerfStats perf_stats_;
|
||||||
ramulator::Gem5Wrapper* dram_;
|
ramulator::Gem5Wrapper* dram_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
Impl(MemSim* simobject, const Config& config)
|
Impl(MemSim* simobject, const Config& config)
|
||||||
: simobject_(simobject)
|
: simobject_(simobject)
|
||||||
, config_(config)
|
, config_(config)
|
||||||
{
|
{
|
||||||
ramulator::Config ram_config;
|
ramulator::Config ram_config;
|
||||||
ram_config.add("standard", "DDR4");
|
ram_config.add("standard", "DDR4");
|
||||||
ram_config.add("channels", std::to_string(config.channels));
|
ram_config.add("channels", std::to_string(config.channels));
|
||||||
ram_config.add("ranks", "1");
|
ram_config.add("ranks", "1");
|
||||||
ram_config.add("speed", "DDR4_2400R");
|
ram_config.add("speed", "DDR4_2400R");
|
||||||
ram_config.add("org", "DDR4_4Gb_x8");
|
ram_config.add("org", "DDR4_4Gb_x8");
|
||||||
ram_config.add("mapping", "defaultmapping");
|
ram_config.add("mapping", "defaultmapping");
|
||||||
ram_config.set_core_num(config.num_cores);
|
ram_config.set_core_num(config.num_cores);
|
||||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||||
Stats::statlist.output("ramulator.ddr4.log");
|
Stats::statlist.output("ramulator.ddr4.log");
|
||||||
}
|
}
|
||||||
|
|
||||||
~Impl() {
|
~Impl() {
|
||||||
dram_->finish();
|
dram_->finish();
|
||||||
Stats::statlist.printall();
|
Stats::statlist.printall();
|
||||||
delete dram_;
|
delete dram_;
|
||||||
}
|
}
|
||||||
|
|
||||||
const PerfStats& perf_stats() const {
|
const PerfStats& perf_stats() const {
|
||||||
return perf_stats_;
|
return perf_stats_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
|
void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) {
|
||||||
if (req.type == ramulator::Request::Type::WRITE)
|
if (req.type == ramulator::Request::Type::WRITE)
|
||||||
return;
|
return;
|
||||||
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
|
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
|
||||||
simobject_->MemRspPort.push(mem_rsp, 1);
|
simobject_->MemRspPort.push(mem_rsp, 1);
|
||||||
DT(3, simobject_->name() << "-" << mem_rsp);
|
DT(3, simobject_->name() << "-" << mem_rsp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
perf_stats_ = PerfStats();
|
perf_stats_ = PerfStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
void tick() {
|
void tick() {
|
||||||
if (MEM_CYCLE_RATIO > 0) {
|
if (MEM_CYCLE_RATIO > 0) {
|
||||||
auto cycle = SimPlatform::instance().cycles();
|
auto cycle = SimPlatform::instance().cycles();
|
||||||
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
if ((cycle % MEM_CYCLE_RATIO) == 0)
|
||||||
dram_->tick();
|
dram_->tick();
|
||||||
} else {
|
} else {
|
||||||
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
for (int i = MEM_CYCLE_RATIO; i <= 0; ++i)
|
||||||
dram_->tick();
|
dram_->tick();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (simobject_->MemReqPort.empty())
|
if (simobject_->MemReqPort.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto& mem_req = simobject_->MemReqPort.front();
|
auto& mem_req = simobject_->MemReqPort.front();
|
||||||
|
|
||||||
ramulator::Request dram_req(
|
ramulator::Request dram_req(
|
||||||
mem_req.addr,
|
mem_req.addr,
|
||||||
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
||||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
||||||
mem_req.cid
|
mem_req.cid
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!dram_->send(dram_req))
|
if (!dram_->send(dram_req))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (mem_req.write) {
|
if (mem_req.write) {
|
||||||
++perf_stats_.writes;
|
++perf_stats_.writes;
|
||||||
} else {
|
} else {
|
||||||
++perf_stats_.reads;
|
++perf_stats_.reads;
|
||||||
}
|
}
|
||||||
|
|
||||||
DT(3, simobject_->name() << "-" << mem_req);
|
DT(3, simobject_->name() << "-" << mem_req);
|
||||||
|
|
||||||
simobject_->MemReqPort.pop();
|
simobject_->MemReqPort.pop();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||||
: SimObject<MemSim>(ctx, name)
|
: SimObject<MemSim>(ctx, name)
|
||||||
, MemReqPort(this)
|
, MemReqPort(this)
|
||||||
, MemRspPort(this)
|
, MemRspPort(this)
|
||||||
, impl_(new Impl(this, config))
|
, impl_(new Impl(this, config))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
MemSim::~MemSim() {
|
MemSim::~MemSim() {
|
||||||
delete impl_;
|
delete impl_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemSim::reset() {
|
void MemSim::reset() {
|
||||||
impl_->reset();
|
impl_->reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemSim::tick() {
|
void MemSim::tick() {
|
||||||
impl_->tick();
|
impl_->tick();
|
||||||
}
|
}
|
|
@ -20,36 +20,36 @@ namespace vortex {
|
||||||
|
|
||||||
class MemSim : public SimObject<MemSim>{
|
class MemSim : public SimObject<MemSim>{
|
||||||
public:
|
public:
|
||||||
struct Config {
|
struct Config {
|
||||||
uint32_t channels;
|
uint32_t channels;
|
||||||
uint32_t num_cores;
|
uint32_t num_cores;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PerfStats {
|
struct PerfStats {
|
||||||
uint64_t reads;
|
uint64_t reads;
|
||||||
uint64_t writes;
|
uint64_t writes;
|
||||||
|
|
||||||
PerfStats()
|
PerfStats()
|
||||||
: reads(0)
|
: reads(0)
|
||||||
, writes(0)
|
, writes(0)
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
SimPort<MemReq> MemReqPort;
|
SimPort<MemReq> MemReqPort;
|
||||||
SimPort<MemRsp> MemRspPort;
|
SimPort<MemRsp> MemRspPort;
|
||||||
|
|
||||||
MemSim(const SimContext& ctx, const char* name, const Config& config);
|
MemSim(const SimContext& ctx, const char* name, const Config& config);
|
||||||
~MemSim();
|
~MemSim();
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
void tick();
|
void tick();
|
||||||
|
|
||||||
const PerfStats& perf_stats() const;
|
const PerfStats& perf_stats() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Impl;
|
class Impl;
|
||||||
Impl* impl_;
|
Impl* impl_;
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
|
@ -23,37 +23,37 @@ public:
|
||||||
SimPort<instr_trace_t*> Output;
|
SimPort<instr_trace_t*> Output;
|
||||||
|
|
||||||
Operand(const SimContext& ctx)
|
Operand(const SimContext& ctx)
|
||||||
: SimObject<Operand>(ctx, "Operand")
|
: SimObject<Operand>(ctx, "Operand")
|
||||||
, Input(this)
|
, Input(this)
|
||||||
, Output(this)
|
, Output(this)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
virtual ~Operand() {}
|
virtual ~Operand() {}
|
||||||
|
|
||||||
virtual void reset() {}
|
virtual void reset() {}
|
||||||
|
|
||||||
virtual void tick() {
|
virtual void tick() {
|
||||||
if (Input.empty())
|
if (Input.empty())
|
||||||
return;
|
return;
|
||||||
auto trace = Input.front();
|
auto trace = Input.front();
|
||||||
|
|
||||||
int delay = 1;
|
int delay = 1;
|
||||||
for (int i = 0; i < MAX_NUM_REGS; ++i) {
|
for (int i = 0; i < MAX_NUM_REGS; ++i) {
|
||||||
bool is_iregs = trace->used_iregs.test(i);
|
bool is_iregs = trace->used_iregs.test(i);
|
||||||
bool is_fregs = trace->used_fregs.test(i);
|
bool is_fregs = trace->used_fregs.test(i);
|
||||||
bool is_vregs = trace->used_vregs.test(i);
|
bool is_vregs = trace->used_vregs.test(i);
|
||||||
if (is_iregs || is_fregs || is_vregs) {
|
if (is_iregs || is_fregs || is_vregs) {
|
||||||
if (is_iregs && i == 0)
|
if (is_iregs && i == 0)
|
||||||
continue;
|
continue;
|
||||||
++delay;
|
++delay;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Output.push(trace, delay);
|
Output.push(trace, delay);
|
||||||
|
|
||||||
DT(3, "pipeline-operands: " << *trace);
|
DT(3, "pipeline-operands: " << *trace);
|
||||||
|
|
||||||
Input.pop();
|
Input.pop();
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -22,98 +22,98 @@ namespace vortex {
|
||||||
class Scoreboard {
|
class Scoreboard {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
struct reg_use_t {
|
struct reg_use_t {
|
||||||
RegType reg_type;
|
RegType reg_type;
|
||||||
uint32_t reg_id;
|
uint32_t reg_id;
|
||||||
FUType fu_type;
|
FUType fu_type;
|
||||||
SfuType sfu_type;
|
SfuType sfu_type;
|
||||||
uint64_t uuid;
|
uint64_t uuid;
|
||||||
};
|
};
|
||||||
|
|
||||||
Scoreboard(const Arch &arch)
|
Scoreboard(const Arch &arch)
|
||||||
: in_use_iregs_(arch.num_warps())
|
: in_use_iregs_(arch.num_warps())
|
||||||
, in_use_fregs_(arch.num_warps())
|
, in_use_fregs_(arch.num_warps())
|
||||||
{
|
{
|
||||||
this->clear();
|
this->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
|
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
|
||||||
in_use_iregs_.at(i).reset();
|
in_use_iregs_.at(i).reset();
|
||||||
in_use_fregs_.at(i).reset();
|
in_use_fregs_.at(i).reset();
|
||||||
}
|
}
|
||||||
owners_.clear();
|
owners_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool in_use(instr_trace_t* trace) const {
|
bool in_use(instr_trace_t* trace) const {
|
||||||
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|
||||||
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
|
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
|
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
|
||||||
std::vector<reg_use_t> out;
|
std::vector<reg_use_t> out;
|
||||||
|
|
||||||
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
|
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
|
||||||
auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
|
auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid);
|
||||||
|
|
||||||
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
||||||
if (used_iregs.test(r)) {
|
if (used_iregs.test(r)) {
|
||||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
|
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
|
||||||
auto owner = owners_.at(tag);
|
auto owner = owners_.at(tag);
|
||||||
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) {
|
||||||
if (used_fregs.test(r)) {
|
if (used_fregs.test(r)) {
|
||||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
|
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
|
||||||
auto owner = owners_.at(tag);
|
auto owner = owners_.at(tag);
|
||||||
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
void reserve(instr_trace_t* trace) {
|
void reserve(instr_trace_t* trace) {
|
||||||
assert(trace->wb);
|
assert(trace->wb);
|
||||||
switch (trace->rdest_type) {
|
switch (trace->rdest_type) {
|
||||||
case RegType::Integer:
|
case RegType::Integer:
|
||||||
in_use_iregs_.at(trace->wid).set(trace->rdest);
|
in_use_iregs_.at(trace->wid).set(trace->rdest);
|
||||||
break;
|
break;
|
||||||
case RegType::Float:
|
case RegType::Float:
|
||||||
in_use_fregs_.at(trace->wid).set(trace->rdest);
|
in_use_fregs_.at(trace->wid).set(trace->rdest);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||||
assert(owners_.count(tag) == 0);
|
assert(owners_.count(tag) == 0);
|
||||||
owners_[tag] = trace;
|
owners_[tag] = trace;
|
||||||
assert((int)trace->fu_type < 5);
|
assert((int)trace->fu_type < 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
void release(instr_trace_t* trace) {
|
void release(instr_trace_t* trace) {
|
||||||
assert(trace->wb);
|
assert(trace->wb);
|
||||||
switch (trace->rdest_type) {
|
switch (trace->rdest_type) {
|
||||||
case RegType::Integer:
|
case RegType::Integer:
|
||||||
in_use_iregs_.at(trace->wid).reset(trace->rdest);
|
in_use_iregs_.at(trace->wid).reset(trace->rdest);
|
||||||
break;
|
break;
|
||||||
case RegType::Float:
|
case RegType::Float:
|
||||||
in_use_fregs_.at(trace->wid).reset(trace->rdest);
|
in_use_fregs_.at(trace->wid).reset(trace->rdest);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||||
owners_.erase(tag);
|
owners_.erase(tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
std::vector<RegMask> in_use_iregs_;
|
std::vector<RegMask> in_use_iregs_;
|
||||||
std::vector<RegMask> in_use_fregs_;
|
std::vector<RegMask> in_use_fregs_;
|
||||||
std::unordered_map<uint32_t, instr_trace_t*> owners_;
|
std::unordered_map<uint32_t, instr_trace_t*> owners_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue