mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
simx refactoring: simobject::push(), instr_trace, FUtype, pending_instrs_
This commit is contained in:
parent
3ec37c6c40
commit
ff6f33acff
20 changed files with 317 additions and 324 deletions
|
@ -57,8 +57,6 @@ public:
|
|||
, tx_cb_(nullptr)
|
||||
{}
|
||||
|
||||
void send(const Pkt& pkt, uint64_t delay = 1) const;
|
||||
|
||||
void bind(SimPort<Pkt>* peer) {
|
||||
assert(peer_ == nullptr);
|
||||
peer_ = peer;
|
||||
|
@ -88,13 +86,7 @@ public:
|
|||
return queue_.front().pkt;
|
||||
}
|
||||
|
||||
const Pkt& back() const {
|
||||
return queue_.back();
|
||||
}
|
||||
|
||||
Pkt& back() {
|
||||
return queue_.back().pkt;
|
||||
}
|
||||
void push(const Pkt& pkt, uint64_t delay = 1) const;
|
||||
|
||||
uint64_t pop() {
|
||||
auto cycles = queue_.front().cycles;
|
||||
|
@ -122,12 +114,12 @@ protected:
|
|||
SimPort* peer_;
|
||||
TxCallback tx_cb_;
|
||||
|
||||
void push(const Pkt& data, uint64_t cycles) {
|
||||
void transfer(const Pkt& data, uint64_t cycles) {
|
||||
if (tx_cb_) {
|
||||
tx_cb_(data, cycles);
|
||||
}
|
||||
if (peer_) {
|
||||
peer_->push(data, cycles);
|
||||
peer_->transfer(data, cycles);
|
||||
} else {
|
||||
queue_.push({data, cycles});
|
||||
}
|
||||
|
@ -199,7 +191,7 @@ template <typename Pkt>
|
|||
class SimPortEvent : public SimEventBase {
|
||||
public:
|
||||
void fire() const override {
|
||||
const_cast<SimPort<Pkt>*>(port_)->push(pkt_, cycles_);
|
||||
const_cast<SimPort<Pkt>*>(port_)->transfer(pkt_, cycles_);
|
||||
}
|
||||
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
|
@ -409,9 +401,9 @@ typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
|
|||
}
|
||||
|
||||
template <typename Pkt>
|
||||
void SimPort<Pkt>::send(const Pkt& pkt, uint64_t delay) const {
|
||||
void SimPort<Pkt>::push(const Pkt& pkt, uint64_t delay) const {
|
||||
if (peer_ && !tx_cb_) {
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->send(pkt, delay);
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
|
||||
} else {
|
||||
SimPlatform::instance().schedule(this, pkt, delay);
|
||||
}
|
||||
|
|
|
@ -492,7 +492,7 @@ private:
|
|||
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
|
||||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
|
||||
simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
|
||||
|
@ -502,13 +502,13 @@ private:
|
|||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
|
||||
bypass_switch_->ReqIn.at(1).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, 1);
|
||||
simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
@ -538,7 +538,7 @@ private:
|
|||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
@ -582,7 +582,7 @@ private:
|
|||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
} else {
|
||||
// mark line as dirty
|
||||
|
@ -595,7 +595,7 @@ private:
|
|||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
@ -614,7 +614,7 @@ private:
|
|||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_line.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
|
@ -628,7 +628,7 @@ private:
|
|||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
}
|
||||
// send core response
|
||||
|
@ -637,7 +637,7 @@ private:
|
|||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
@ -656,7 +656,7 @@ private:
|
|||
mem_req.tag = mshr_id;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
|
|
|
@ -49,11 +49,9 @@ Core::Core(const SimContext& ctx,
|
|||
, ibuffers_(arch.num_warps(), IBUF_SIZE)
|
||||
, scoreboard_(arch_)
|
||||
, operands_(ISSUE_WIDTH)
|
||||
, dispatchers_((uint32_t)ExeType::ExeTypeCount)
|
||||
, exe_units_((uint32_t)ExeType::ExeTypeCount)
|
||||
, dispatchers_((uint32_t)FUType::Count)
|
||||
, exe_units_((uint32_t)FUType::Count)
|
||||
, lmem_demuxs_(NUM_LSU_LANES)
|
||||
, fetch_latch_("fetch")
|
||||
, decode_latch_("decode")
|
||||
, pending_icache_(arch_.num_warps())
|
||||
, csrs_(arch.num_warps())
|
||||
, commit_arbs_(ISSUE_WIDTH)
|
||||
|
@ -95,22 +93,22 @@ Core::Core(const SimContext& ctx,
|
|||
}
|
||||
|
||||
// initialize dispatchers
|
||||
dispatchers_.at((int)ExeType::ALU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_ALU_BLOCKS, NUM_ALU_LANES);
|
||||
dispatchers_.at((int)ExeType::FPU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES);
|
||||
dispatchers_.at((int)ExeType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_LSU_LANES);
|
||||
dispatchers_.at((int)ExeType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_SFU_LANES);
|
||||
dispatchers_.at((int)FUType::ALU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_ALU_BLOCKS, NUM_ALU_LANES);
|
||||
dispatchers_.at((int)FUType::FPU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES);
|
||||
dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_LSU_LANES);
|
||||
dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_SFU_LANES);
|
||||
|
||||
// initialize execute units
|
||||
exe_units_.at((int)ExeType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
|
||||
exe_units_.at((int)ExeType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
|
||||
exe_units_.at((int)ExeType::LSU) = SimPlatform::instance().create_object<LsuUnit>(this);
|
||||
exe_units_.at((int)ExeType::SFU) = SimPlatform::instance().create_object<SfuUnit>(this);
|
||||
exe_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
|
||||
exe_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
|
||||
exe_units_.at((int)FUType::LSU) = SimPlatform::instance().create_object<LsuUnit>(this);
|
||||
exe_units_.at((int)FUType::SFU) = SimPlatform::instance().create_object<SfuUnit>(this);
|
||||
|
||||
// bind commit arbiters
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
|
||||
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)ExeType::ExeTypeCount, 1);
|
||||
for (uint32_t j = 0; j < (uint32_t)ExeType::ExeTypeCount; ++j) {
|
||||
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
|
||||
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
|
||||
exe_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
|
||||
}
|
||||
commit_arbs_.at(i) = arbiter;
|
||||
|
@ -157,8 +155,7 @@ void Core::reset() {
|
|||
decode_latch_.clear();
|
||||
pending_icache_.clear();
|
||||
stalled_warps_.reset();
|
||||
issued_instrs_ = 0;
|
||||
committed_instrs_ = 0;
|
||||
pending_instrs_ = 0;
|
||||
exited_ = false;
|
||||
perf_stats_ = PerfStats();
|
||||
pending_ifetches_ = 0;
|
||||
|
@ -204,7 +201,7 @@ void Core::schedule() {
|
|||
|
||||
// advance to fetch stage
|
||||
fetch_latch_.push(trace);
|
||||
++issued_instrs_;
|
||||
++pending_instrs_;
|
||||
}
|
||||
|
||||
void Core::fetch() {
|
||||
|
@ -232,7 +229,7 @@ void Core::fetch() {
|
|||
mem_req.tag = pending_icache_.allocate(trace);
|
||||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
icache_req_ports.at(0).send(mem_req, 2);
|
||||
icache_req_ports.at(0).push(mem_req, 2);
|
||||
DT(3, "icache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace);
|
||||
fetch_latch_.pop();
|
||||
++perf_stats_.ifetches;
|
||||
|
@ -278,7 +275,7 @@ void Core::issue() {
|
|||
if (operand->Output.empty())
|
||||
continue;
|
||||
auto trace = operand->Output.front();
|
||||
if (dispatchers_.at((int)trace->exe_type)->push(i, trace)) {
|
||||
if (dispatchers_.at((int)trace->fu_type)->push(i, trace)) {
|
||||
operand->Output.pop();
|
||||
trace->log_once(false);
|
||||
} else {
|
||||
|
@ -312,11 +309,11 @@ void Core::issue() {
|
|||
}
|
||||
for (uint32_t j = 0, n = uses.size(); j < n; ++j) {
|
||||
auto& use = uses.at(j);
|
||||
switch (use.exe_type) {
|
||||
case ExeType::ALU: ++perf_stats_.scrb_alu; break;
|
||||
case ExeType::FPU: ++perf_stats_.scrb_fpu; break;
|
||||
case ExeType::LSU: ++perf_stats_.scrb_lsu; break;
|
||||
case ExeType::SFU: {
|
||||
switch (use.fu_type) {
|
||||
case FUType::ALU: ++perf_stats_.scrb_alu; break;
|
||||
case FUType::FPU: ++perf_stats_.scrb_fpu; break;
|
||||
case FUType::LSU: ++perf_stats_.scrb_lsu; break;
|
||||
case FUType::SFU: {
|
||||
++perf_stats_.scrb_sfu;
|
||||
switch (use.sfu_type) {
|
||||
case SfuType::TMC:
|
||||
|
@ -348,7 +345,7 @@ void Core::issue() {
|
|||
DT(3, "pipeline-scoreboard: " << *trace);
|
||||
|
||||
// to operand stage
|
||||
operands_.at(i)->Input.send(trace, 1);
|
||||
operands_.at(i)->Input.push(trace, 1);
|
||||
|
||||
ibuffer.pop();
|
||||
}
|
||||
|
@ -356,14 +353,14 @@ void Core::issue() {
|
|||
}
|
||||
|
||||
void Core::execute() {
|
||||
for (uint32_t i = 0; i < (uint32_t)ExeType::ExeTypeCount; ++i) {
|
||||
for (uint32_t i = 0; i < (uint32_t)FUType::Count; ++i) {
|
||||
auto& dispatch = dispatchers_.at(i);
|
||||
auto& exe_unit = exe_units_.at(i);
|
||||
for (uint32_t j = 0; j < ISSUE_WIDTH; ++j) {
|
||||
if (dispatch->Outputs.at(j).empty())
|
||||
continue;
|
||||
auto trace = dispatch->Outputs.at(j).front();
|
||||
exe_unit->Inputs.at(j).send(trace, 1);
|
||||
exe_unit->Inputs.at(j).push(trace, 1);
|
||||
dispatch->Outputs.at(j).pop();
|
||||
}
|
||||
}
|
||||
|
@ -387,8 +384,7 @@ void Core::commit() {
|
|||
scoreboard_.release(trace);
|
||||
}
|
||||
|
||||
assert(committed_instrs_ <= issued_instrs_);
|
||||
++committed_instrs_;
|
||||
--pending_instrs_;
|
||||
|
||||
perf_stats_.instrs += trace->tmask.count();
|
||||
}
|
||||
|
@ -743,7 +739,7 @@ bool Core::check_exit(Word* exitcode, bool riscv_test) const {
|
|||
}
|
||||
|
||||
bool Core::running() const {
|
||||
return (committed_instrs_ != issued_instrs_);
|
||||
return (pending_instrs_ != 0);
|
||||
}
|
||||
|
||||
void Core::resume() {
|
||||
|
|
|
@ -42,7 +42,7 @@ namespace vortex {
|
|||
|
||||
class Socket;
|
||||
|
||||
using TraceSwitch = Mux<pipeline_trace_t*>;
|
||||
using TraceSwitch = Mux<instr_trace_t*>;
|
||||
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
|
@ -187,11 +187,10 @@ private:
|
|||
PipelineLatch fetch_latch_;
|
||||
PipelineLatch decode_latch_;
|
||||
|
||||
HashTable<pipeline_trace_t*> pending_icache_;
|
||||
HashTable<instr_trace_t*> pending_icache_;
|
||||
WarpMask active_warps_;
|
||||
WarpMask stalled_warps_;
|
||||
uint64_t issued_instrs_;
|
||||
uint64_t committed_instrs_;
|
||||
uint64_t pending_instrs_;
|
||||
bool exited_;
|
||||
|
||||
uint64_t pending_ifetches_;
|
||||
|
|
|
@ -13,21 +13,21 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include "instr_trace.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Dispatcher : public SimObject<Dispatcher> {
|
||||
public:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
std::vector<SimPort<instr_trace_t*>> Outputs;
|
||||
|
||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, Inputs_(ISSUE_WIDTH, this)
|
||||
, arch_(arch)
|
||||
, queues_(ISSUE_WIDTH, std::queue<pipeline_trace_t*>())
|
||||
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
|
||||
, buf_size_(buf_size)
|
||||
, block_size_(block_size)
|
||||
, num_lanes_(num_lanes)
|
||||
|
@ -52,7 +52,7 @@ public:
|
|||
if (queue.empty())
|
||||
continue;
|
||||
auto trace = queue.front();
|
||||
Inputs_.at(i).send(trace, 1);
|
||||
Inputs_.at(i).push(trace, 1);
|
||||
queue.pop();
|
||||
}
|
||||
|
||||
|
@ -84,7 +84,7 @@ public:
|
|||
start /= num_lanes_;
|
||||
end /= num_lanes_;
|
||||
if (start != end) {
|
||||
new_trace = new pipeline_trace_t(*trace);
|
||||
new_trace = new instr_trace_t(*trace);
|
||||
new_trace->eop = false;
|
||||
start_p_.at(b) = start + 1;
|
||||
} else {
|
||||
|
@ -105,7 +105,7 @@ public:
|
|||
++block_sent;
|
||||
}
|
||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||
output.send(new_trace, 1);
|
||||
output.push(new_trace, 1);
|
||||
}
|
||||
if (block_sent == block_size_) {
|
||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||
|
@ -115,7 +115,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
bool push(uint32_t issue_index, pipeline_trace_t* trace) {
|
||||
bool push(uint32_t issue_index, instr_trace_t* trace) {
|
||||
auto& queue = queues_.at(issue_index);
|
||||
if (queue.size() >= buf_size_)
|
||||
return false;
|
||||
|
@ -124,9 +124,9 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs_;
|
||||
std::vector<SimPort<instr_trace_t*>> Inputs_;
|
||||
const Arch& arch_;
|
||||
std::vector<std::queue<pipeline_trace_t*>> queues_;
|
||||
std::vector<std::queue<instr_trace_t*>> queues_;
|
||||
uint32_t buf_size_;
|
||||
uint32_t block_size_;
|
||||
uint32_t num_lanes_;
|
||||
|
|
|
@ -38,10 +38,10 @@ void AluUnit::tick() {
|
|||
case AluType::BRANCH:
|
||||
case AluType::SYSCALL:
|
||||
case AluType::IMUL:
|
||||
output.send(trace, LATENCY_IMUL+1);
|
||||
output.push(trace, LATENCY_IMUL+1);
|
||||
break;
|
||||
case AluType::IDIV:
|
||||
output.send(trace, XLEN+1);
|
||||
output.push(trace, XLEN+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
|
@ -68,19 +68,19 @@ void FpuUnit::tick() {
|
|||
auto trace = input.front();
|
||||
switch (trace->fpu_type) {
|
||||
case FpuType::FNCP:
|
||||
output.send(trace, 2);
|
||||
output.push(trace, 2);
|
||||
break;
|
||||
case FpuType::FMA:
|
||||
output.send(trace, LATENCY_FMA+1);
|
||||
output.push(trace, LATENCY_FMA+1);
|
||||
break;
|
||||
case FpuType::FDIV:
|
||||
output.send(trace, LATENCY_FDIV+1);
|
||||
output.push(trace, LATENCY_FDIV+1);
|
||||
break;
|
||||
case FpuType::FSQRT:
|
||||
output.send(trace, LATENCY_FSQRT+1);
|
||||
output.push(trace, LATENCY_FSQRT+1);
|
||||
break;
|
||||
case FpuType::FCVT:
|
||||
output.send(trace, LATENCY_FCVT+1);
|
||||
output.push(trace, LATENCY_FCVT+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
|
@ -125,7 +125,7 @@ void LsuUnit::tick() {
|
|||
if (0 == entry.count) {
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
output.push(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
|
@ -146,7 +146,7 @@ void LsuUnit::tick() {
|
|||
if (0 == entry.count) {
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
output.push(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
lmem_rsp_port.pop();
|
||||
|
@ -159,7 +159,7 @@ void LsuUnit::tick() {
|
|||
return;
|
||||
int iw = fence_state_->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(fence_state_, 1);
|
||||
output.push(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
DT(3, "fence-unlock: " << fence_state_);
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ void LsuUnit::tick() {
|
|||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
dcache_req_port.send(mem_req, 1);
|
||||
dcache_req_port.push(mem_req, 1);
|
||||
DT(3, "dcache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", tid=" << t << ", addr_type=" << mem_req.type << ", " << *trace);
|
||||
|
||||
|
@ -257,7 +257,7 @@ void LsuUnit::tick() {
|
|||
// do not wait on writes
|
||||
if (is_write) {
|
||||
pending_rd_reqs_.release(tag);
|
||||
output.send(trace, 1);
|
||||
output.push(trace, 1);
|
||||
}
|
||||
|
||||
// remove input
|
||||
|
@ -296,10 +296,10 @@ void SfuUnit::tick() {
|
|||
case SfuType::CSRRW:
|
||||
case SfuType::CSRRS:
|
||||
case SfuType::CSRRC:
|
||||
output.send(trace, 1);
|
||||
output.push(trace, 1);
|
||||
break;
|
||||
case SfuType::BAR: {
|
||||
output.send(trace, 1);
|
||||
output.push(trace, 1);
|
||||
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
|
||||
if (trace->eop) {
|
||||
core_->barrier(trace_data->bar.id, trace_data->bar.count, trace->wid);
|
||||
|
@ -307,7 +307,7 @@ void SfuUnit::tick() {
|
|||
release_warp = false;
|
||||
} break;
|
||||
case SfuType::CMOV:
|
||||
output.send(trace, 3);
|
||||
output.push(trace, 3);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
|
|
|
@ -14,8 +14,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "pipeline.h"
|
||||
#include "cache_sim.h"
|
||||
#include "instr_trace.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
|
@ -23,8 +22,8 @@ class Core;
|
|||
|
||||
class ExeUnit : public SimObject<ExeUnit> {
|
||||
public:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs;
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
std::vector<SimPort<instr_trace_t*>> Inputs;
|
||||
std::vector<SimPort<instr_trace_t*>> Outputs;
|
||||
|
||||
ExeUnit(const SimContext& ctx, Core* core, const char* name)
|
||||
: SimObject<ExeUnit>(ctx, name)
|
||||
|
@ -73,12 +72,12 @@ public:
|
|||
|
||||
private:
|
||||
struct pending_req_t {
|
||||
pipeline_trace_t* trace;
|
||||
instr_trace_t* trace;
|
||||
uint32_t count;
|
||||
};
|
||||
HashTable<pending_req_t> pending_rd_reqs_;
|
||||
uint32_t num_lanes_;
|
||||
pipeline_trace_t* fence_state_;
|
||||
instr_trace_t* fence_state_;
|
||||
uint64_t pending_loads_;
|
||||
bool fence_lock_;
|
||||
uint32_t input_idx_;
|
||||
|
|
|
@ -66,7 +66,7 @@ inline int64_t check_boxing(int64_t a) {
|
|||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
|
||||
void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
void Warp::execute(const Instr &instr, instr_trace_t *trace) {
|
||||
assert(tmask_.any());
|
||||
|
||||
auto next_pc = PC_ + 4;
|
||||
|
@ -136,7 +136,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
switch (opcode) {
|
||||
case Opcode::LUI: {
|
||||
// RV32I: LUI
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
|
@ -148,7 +148,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case Opcode::AUIPC: {
|
||||
// RV32I: AUIPC
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
|
@ -159,7 +159,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case Opcode::R: {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -320,7 +320,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case Opcode::I: {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
|
@ -380,7 +380,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case Opcode::R_W: {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -507,7 +507,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case Opcode::I_W: {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
|
@ -550,7 +550,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case Opcode::B: {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::BRANCH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -610,7 +610,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case Opcode::JAL: {
|
||||
// RV32I: JAL
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::BRANCH;
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
|
@ -624,7 +624,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case Opcode::JALR: {
|
||||
// RV32I: JALR
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::BRANCH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
|
@ -639,7 +639,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case Opcode::L:
|
||||
case Opcode::FL: {
|
||||
trace->exe_type = ExeType::LSU;
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::LOAD;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
|
@ -683,7 +683,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case Opcode::S:
|
||||
case Opcode::FS: {
|
||||
trace->exe_type = ExeType::LSU;
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::STORE;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -710,7 +710,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
case Opcode::AMO: {
|
||||
trace->exe_type = ExeType::LSU;
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::LOAD;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -790,7 +790,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
uint32_t csr_addr = immsrc;
|
||||
uint32_t csr_value;
|
||||
if (func3 == 0) {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->fu_type = FUType::ALU;
|
||||
trace->alu_type = AluType::SYSCALL;
|
||||
trace->fetch_stall = true;
|
||||
switch (csr_addr) {
|
||||
|
@ -810,7 +810,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
std::abort();
|
||||
}
|
||||
} else {
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->fetch_stall = true;
|
||||
csr_value = core_->get_csr(csr_addr, t, warp_id_);
|
||||
switch (func3) {
|
||||
|
@ -882,12 +882,12 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
}
|
||||
case Opcode::FENCE: {
|
||||
// RV32I: FENCE
|
||||
trace->exe_type = ExeType::LSU;
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::FENCE;
|
||||
break;
|
||||
}
|
||||
case Opcode::FCI: {
|
||||
trace->exe_type = ExeType::FPU;
|
||||
trace->fu_type = FUType::FPU;
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
continue;
|
||||
|
@ -1271,7 +1271,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
switch (func3) {
|
||||
case 0: {
|
||||
// TMC
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::TMC;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->fetch_stall = true;
|
||||
|
@ -1282,7 +1282,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
} break;
|
||||
case 1: {
|
||||
// WSPAWN
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::WSPAWN;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -1291,7 +1291,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
} break;
|
||||
case 2: {
|
||||
// SPLIT
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::SPLIT;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->fetch_stall = true;
|
||||
|
@ -1324,7 +1324,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
} break;
|
||||
case 3: {
|
||||
// JOIN
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::JOIN;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->fetch_stall = true;
|
||||
|
@ -1344,7 +1344,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
} break;
|
||||
case 4: {
|
||||
// BAR
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::BAR;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -1353,7 +1353,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
} break;
|
||||
case 5: {
|
||||
// PRED
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::PRED;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
@ -1381,7 +1381,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||
case 1:
|
||||
switch (func2) {
|
||||
case 0: { // CMOV
|
||||
trace->exe_type = ExeType::SFU;
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::CMOV;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include "instr_trace.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
@ -32,11 +32,11 @@ public:
|
|||
return (entries_.size() == capacity_);
|
||||
}
|
||||
|
||||
pipeline_trace_t* top() const {
|
||||
instr_trace_t* top() const {
|
||||
return entries_.front();
|
||||
}
|
||||
|
||||
void push(pipeline_trace_t* trace) {
|
||||
void push(instr_trace_t* trace) {
|
||||
entries_.emplace(trace);
|
||||
}
|
||||
|
||||
|
@ -45,12 +45,12 @@ public:
|
|||
}
|
||||
|
||||
void clear() {
|
||||
std::queue<pipeline_trace_t*> empty;
|
||||
std::queue<instr_trace_t*> empty;
|
||||
std::swap(entries_, empty );
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<pipeline_trace_t*> entries_;
|
||||
std::queue<instr_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
};
|
||||
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
namespace vortex {
|
||||
|
||||
class Warp;
|
||||
|
||||
enum class Opcode {
|
||||
NONE = 0,
|
||||
R = 0x33,
|
||||
|
|
170
sim/simx/instr_trace.h
Normal file
170
sim/simx/instr_trace.h
Normal file
|
@ -0,0 +1,170 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "arch.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ITraceData {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ITraceData>;
|
||||
ITraceData() {}
|
||||
virtual ~ITraceData() {}
|
||||
};
|
||||
|
||||
struct LsuTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<LsuTraceData>;
|
||||
std::vector<mem_addr_size_t> mem_addrs;
|
||||
LsuTraceData(uint32_t num_threads) : mem_addrs(num_threads) {}
|
||||
};
|
||||
|
||||
struct SFUTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<SFUTraceData>;
|
||||
struct {
|
||||
uint32_t id;
|
||||
uint32_t count;
|
||||
} bar;
|
||||
SFUTraceData(uint32_t bar_id, uint32_t bar_count) : bar{bar_id, bar_count} {}
|
||||
};
|
||||
|
||||
struct instr_trace_t {
|
||||
public:
|
||||
//--
|
||||
const uint64_t uuid;
|
||||
const Arch& arch;
|
||||
|
||||
//--
|
||||
uint32_t cid;
|
||||
uint32_t wid;
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
|
||||
//--
|
||||
uint32_t rdest;
|
||||
RegType rdest_type;
|
||||
bool wb;
|
||||
|
||||
//--
|
||||
RegMask used_iregs;
|
||||
RegMask used_fregs;
|
||||
RegMask used_vregs;
|
||||
|
||||
//-
|
||||
FUType fu_type;
|
||||
|
||||
//--
|
||||
union {
|
||||
uint32_t unit_type;
|
||||
LsuType lsu_type;
|
||||
AluType alu_type;
|
||||
FpuType fpu_type;
|
||||
SfuType sfu_type;
|
||||
};
|
||||
|
||||
ITraceData::Ptr data;
|
||||
|
||||
int pid;
|
||||
bool sop;
|
||||
bool eop;
|
||||
|
||||
bool fetch_stall;
|
||||
|
||||
instr_trace_t(uint64_t uuid, const Arch& arch)
|
||||
: uuid(uuid)
|
||||
, arch(arch)
|
||||
, cid(0)
|
||||
, wid(0)
|
||||
, tmask(0)
|
||||
, PC(0)
|
||||
, rdest(0)
|
||||
, rdest_type(RegType::None)
|
||||
, wb(false)
|
||||
, used_iregs(0)
|
||||
, used_fregs(0)
|
||||
, used_vregs(0)
|
||||
, fu_type(FUType::ALU)
|
||||
, unit_type(0)
|
||||
, data(nullptr)
|
||||
, pid(-1)
|
||||
, sop(true)
|
||||
, eop(true)
|
||||
, fetch_stall(false)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
instr_trace_t(const instr_trace_t& rhs)
|
||||
: uuid(rhs.uuid)
|
||||
, arch(rhs.arch)
|
||||
, cid(rhs.cid)
|
||||
, wid(rhs.wid)
|
||||
, tmask(rhs.tmask)
|
||||
, PC(rhs.PC)
|
||||
, rdest(rhs.rdest)
|
||||
, rdest_type(rhs.rdest_type)
|
||||
, wb(rhs.wb)
|
||||
, used_iregs(rhs.used_iregs)
|
||||
, used_fregs(rhs.used_fregs)
|
||||
, used_vregs(rhs.used_vregs)
|
||||
, fu_type(rhs.fu_type)
|
||||
, unit_type(rhs.unit_type)
|
||||
, data(rhs.data)
|
||||
, pid(rhs.pid)
|
||||
, sop(rhs.sop)
|
||||
, eop(rhs.eop)
|
||||
, fetch_stall(rhs.fetch_stall)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
~instr_trace_t() {}
|
||||
|
||||
bool log_once(bool enable) {
|
||||
bool old = log_once_;
|
||||
log_once_ = enable;
|
||||
return old;
|
||||
}
|
||||
|
||||
private:
|
||||
bool log_once_;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const instr_trace_t& trace) {
|
||||
os << "cid=" << trace.cid;
|
||||
os << ", wid=" << trace.wid;
|
||||
os << ", tmask=";
|
||||
for (uint32_t i = 0, n = trace.arch.num_threads(); i < n; ++i) {
|
||||
os << trace.tmask.test(i);
|
||||
}
|
||||
os << ", PC=0x" << std::hex << trace.PC;
|
||||
os << ", wb=" << trace.wb;
|
||||
if (trace.wb) {
|
||||
os << ", rd=" << trace.rdest_type << std::dec << trace.rdest;
|
||||
}
|
||||
os << ", ex=" << trace.fu_type;
|
||||
if (trace.pid != -1) {
|
||||
os << ", pid=" << trace.pid;
|
||||
os << ", sop=" << trace.sop;
|
||||
os << ", eop=" << trace.eop;
|
||||
}
|
||||
os << " (#" << std::dec << trace.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
}
|
|
@ -87,7 +87,7 @@ public:
|
|||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||
simobject_->Outputs.at(req_id).send(core_rsp, 1);
|
||||
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
|
|
|
@ -69,7 +69,7 @@ public:
|
|||
if (req.type == ramulator::Request::Type::WRITE)
|
||||
return;
|
||||
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
|
||||
simobject_->MemRspPort.send(mem_rsp, 1);
|
||||
simobject_->MemRspPort.push(mem_rsp, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_rsp);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include "instr_trace.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Operand : public SimObject<Operand> {
|
||||
public:
|
||||
SimPort<pipeline_trace_t*> Input;
|
||||
SimPort<pipeline_trace_t*> Output;
|
||||
SimPort<instr_trace_t*> Input;
|
||||
SimPort<instr_trace_t*> Output;
|
||||
|
||||
Operand(const SimContext& ctx)
|
||||
: SimObject<Operand>(ctx, "Operand")
|
||||
|
@ -50,7 +50,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Output.send(trace, delay);
|
||||
Output.push(trace, delay);
|
||||
|
||||
DT(3, "pipeline-operands: " << *trace);
|
||||
|
||||
|
|
|
@ -14,178 +14,24 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "arch.h"
|
||||
#include "debug.h"
|
||||
#include "instr_trace.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ITraceData {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ITraceData>;
|
||||
ITraceData() {}
|
||||
virtual ~ITraceData() {}
|
||||
};
|
||||
|
||||
struct LsuTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<LsuTraceData>;
|
||||
std::vector<mem_addr_size_t> mem_addrs;
|
||||
LsuTraceData(uint32_t num_threads) : mem_addrs(num_threads) {}
|
||||
};
|
||||
|
||||
struct SFUTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<SFUTraceData>;
|
||||
struct {
|
||||
uint32_t id;
|
||||
uint32_t count;
|
||||
} bar;
|
||||
SFUTraceData(uint32_t bar_id, uint32_t bar_count) : bar{bar_id, bar_count} {}
|
||||
};
|
||||
|
||||
struct pipeline_trace_t {
|
||||
public:
|
||||
//--
|
||||
const uint64_t uuid;
|
||||
const Arch& arch;
|
||||
|
||||
//--
|
||||
uint32_t cid;
|
||||
uint32_t wid;
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
|
||||
//--
|
||||
uint32_t rdest;
|
||||
RegType rdest_type;
|
||||
bool wb;
|
||||
|
||||
//--
|
||||
RegMask used_iregs;
|
||||
RegMask used_fregs;
|
||||
RegMask used_vregs;
|
||||
|
||||
//-
|
||||
ExeType exe_type;
|
||||
|
||||
//--
|
||||
union {
|
||||
uint32_t unit_type;
|
||||
LsuType lsu_type;
|
||||
AluType alu_type;
|
||||
FpuType fpu_type;
|
||||
SfuType sfu_type;
|
||||
};
|
||||
|
||||
ITraceData::Ptr data;
|
||||
|
||||
int pid;
|
||||
bool sop;
|
||||
bool eop;
|
||||
|
||||
bool fetch_stall;
|
||||
|
||||
pipeline_trace_t(uint64_t uuid, const Arch& arch)
|
||||
: uuid(uuid)
|
||||
, arch(arch)
|
||||
, cid(0)
|
||||
, wid(0)
|
||||
, tmask(0)
|
||||
, PC(0)
|
||||
, rdest(0)
|
||||
, rdest_type(RegType::None)
|
||||
, wb(false)
|
||||
, used_iregs(0)
|
||||
, used_fregs(0)
|
||||
, used_vregs(0)
|
||||
, exe_type(ExeType::ALU)
|
||||
, unit_type(0)
|
||||
, data(nullptr)
|
||||
, pid(-1)
|
||||
, sop(true)
|
||||
, eop(true)
|
||||
, fetch_stall(false)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
pipeline_trace_t(const pipeline_trace_t& rhs)
|
||||
: uuid(rhs.uuid)
|
||||
, arch(rhs.arch)
|
||||
, cid(rhs.cid)
|
||||
, wid(rhs.wid)
|
||||
, tmask(rhs.tmask)
|
||||
, PC(rhs.PC)
|
||||
, rdest(rhs.rdest)
|
||||
, rdest_type(rhs.rdest_type)
|
||||
, wb(rhs.wb)
|
||||
, used_iregs(rhs.used_iregs)
|
||||
, used_fregs(rhs.used_fregs)
|
||||
, used_vregs(rhs.used_vregs)
|
||||
, exe_type(rhs.exe_type)
|
||||
, unit_type(rhs.unit_type)
|
||||
, data(rhs.data)
|
||||
, pid(rhs.pid)
|
||||
, sop(rhs.sop)
|
||||
, eop(rhs.eop)
|
||||
, fetch_stall(rhs.fetch_stall)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
~pipeline_trace_t() {}
|
||||
|
||||
bool log_once(bool enable) {
|
||||
bool old = log_once_;
|
||||
log_once_ = enable;
|
||||
return old;
|
||||
}
|
||||
|
||||
private:
|
||||
bool log_once_;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const pipeline_trace_t& state) {
|
||||
os << "cid=" << state.cid;
|
||||
os << ", wid=" << state.wid;
|
||||
os << ", tmask=";
|
||||
for (uint32_t i = 0, n = state.arch.num_threads(); i < n; ++i) {
|
||||
os << state.tmask.test(i);
|
||||
}
|
||||
os << ", PC=0x" << std::hex << state.PC;
|
||||
os << ", wb=" << state.wb;
|
||||
if (state.wb) {
|
||||
os << ", rd=" << state.rdest_type << std::dec << state.rdest;
|
||||
}
|
||||
os << ", ex=" << state.exe_type;
|
||||
if (state.pid != -1) {
|
||||
os << ", pid=" << state.pid;
|
||||
os << ", sop=" << state.sop;
|
||||
os << ", eop=" << state.eop;
|
||||
}
|
||||
os << " (#" << std::dec << state.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
class PipelineLatch {
|
||||
public:
|
||||
PipelineLatch(const char* name = nullptr)
|
||||
: name_(name)
|
||||
{}
|
||||
PipelineLatch() {}
|
||||
~PipelineLatch() {}
|
||||
|
||||
bool empty() const {
|
||||
return queue_.empty();
|
||||
}
|
||||
|
||||
pipeline_trace_t* front() {
|
||||
instr_trace_t* front() {
|
||||
return queue_.front();
|
||||
}
|
||||
|
||||
pipeline_trace_t* back() {
|
||||
return queue_.back();
|
||||
}
|
||||
|
||||
void push(pipeline_trace_t* value) {
|
||||
void push(instr_trace_t* value) {
|
||||
queue_.push(value);
|
||||
}
|
||||
|
||||
|
@ -194,13 +40,12 @@ public:
|
|||
}
|
||||
|
||||
void clear() {
|
||||
std::queue<pipeline_trace_t*> empty;
|
||||
std::swap(queue_, empty );
|
||||
std::queue<instr_trace_t*> empty;
|
||||
std::swap(queue_, empty);
|
||||
}
|
||||
|
||||
protected:
|
||||
const char* name_;
|
||||
std::queue<pipeline_trace_t*> queue_;
|
||||
std::queue<instr_trace_t*> queue_;
|
||||
};
|
||||
|
||||
}
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include "instr_trace.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
@ -24,7 +24,7 @@ public:
|
|||
struct reg_use_t {
|
||||
RegType reg_type;
|
||||
uint32_t reg_id;
|
||||
ExeType exe_type;
|
||||
FUType fu_type;
|
||||
SfuType sfu_type;
|
||||
uint64_t uuid;
|
||||
};
|
||||
|
@ -44,12 +44,12 @@ public:
|
|||
owners_.clear();
|
||||
}
|
||||
|
||||
bool in_use(pipeline_trace_t* trace) const {
|
||||
bool in_use(instr_trace_t* trace) const {
|
||||
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|
||||
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
|
||||
}
|
||||
|
||||
std::vector<reg_use_t> get_uses(pipeline_trace_t* trace) const {
|
||||
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
|
||||
std::vector<reg_use_t> out;
|
||||
|
||||
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
|
||||
|
@ -59,7 +59,7 @@ public:
|
|||
if (used_iregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Integer, r, owner->exe_type, owner->sfu_type, owner->uuid});
|
||||
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,14 +67,14 @@ public:
|
|||
if (used_fregs.test(r)) {
|
||||
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
|
||||
auto owner = owners_.at(tag);
|
||||
out.push_back({RegType::Float, r, owner->exe_type, owner->sfu_type, owner->uuid});
|
||||
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void reserve(pipeline_trace_t* trace) {
|
||||
void reserve(instr_trace_t* trace) {
|
||||
assert(trace->wb);
|
||||
switch (trace->rdest_type) {
|
||||
case RegType::Integer:
|
||||
|
@ -89,10 +89,10 @@ public:
|
|||
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
|
||||
assert(owners_.count(tag) == 0);
|
||||
owners_[tag] = trace;
|
||||
assert((int)trace->exe_type < 5);
|
||||
assert((int)trace->fu_type < 5);
|
||||
}
|
||||
|
||||
void release(pipeline_trace_t* trace) {
|
||||
void release(instr_trace_t* trace) {
|
||||
assert(trace->wb);
|
||||
switch (trace->rdest_type) {
|
||||
case RegType::Integer:
|
||||
|
@ -112,7 +112,7 @@ private:
|
|||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::unordered_map<uint32_t, pipeline_trace_t*> owners_;
|
||||
std::unordered_map<uint32_t, instr_trace_t*> owners_;
|
||||
};
|
||||
|
||||
}
|
BIN
sim/simx/simx
Executable file
BIN
sim/simx/simx
Executable file
Binary file not shown.
|
@ -75,20 +75,20 @@ inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class ExeType {
|
||||
enum class FUType {
|
||||
ALU,
|
||||
LSU,
|
||||
FPU,
|
||||
SFU,
|
||||
ExeTypeCount
|
||||
Count
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const ExeType& type) {
|
||||
inline std::ostream &operator<<(std::ostream &os, const FUType& type) {
|
||||
switch (type) {
|
||||
case ExeType::ALU: os << "ALU"; break;
|
||||
case ExeType::LSU: os << "LSU"; break;
|
||||
case ExeType::FPU: os << "FPU"; break;
|
||||
case ExeType::SFU: os << "SFU"; break;
|
||||
case FUType::ALU: os << "ALU"; break;
|
||||
case FUType::LSU: os << "LSU"; break;
|
||||
case FUType::FPU: os << "FPU"; break;
|
||||
case FUType::SFU: os << "SFU"; break;
|
||||
default: assert(false);
|
||||
}
|
||||
return os;
|
||||
|
@ -417,7 +417,7 @@ public:
|
|||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
DT(4, this->name() << "-" << req);
|
||||
Outputs.at(o).send(req, delay_);
|
||||
Outputs.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
break;
|
||||
|
@ -513,7 +513,7 @@ public:
|
|||
req.tag = (req.tag << lg_num_reqs_) | i;
|
||||
}
|
||||
DT(4, this->name() << "-" << req);
|
||||
ReqOut.at(o).send(req, delay_);
|
||||
ReqOut.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
break;
|
||||
|
@ -530,7 +530,7 @@ public:
|
|||
}
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
uint32_t j = o * R + i;
|
||||
RspIn.at(j).send(rsp, 1);
|
||||
RspIn.at(j).push(rsp, 1);
|
||||
RspOut.at(o).pop();
|
||||
}
|
||||
}
|
||||
|
@ -583,13 +583,13 @@ public:
|
|||
if (!RspSM.empty()) {
|
||||
auto& rsp = RspSM.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.send(rsp, 1);
|
||||
RspIn.push(rsp, 1);
|
||||
RspSM.pop();
|
||||
}
|
||||
if (!RspDC.empty()) {
|
||||
auto& rsp = RspDC.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.send(rsp, 1);
|
||||
RspIn.push(rsp, 1);
|
||||
RspDC
|
||||
.pop();
|
||||
}
|
||||
|
@ -598,9 +598,9 @@ public:
|
|||
auto& req = ReqIn.front();
|
||||
DT(4, this->name() << "-" << req);
|
||||
if (req.type == AddrType::Shared) {
|
||||
ReqSM.send(req, delay_);
|
||||
ReqSM.push(req, delay_);
|
||||
} else {
|
||||
ReqDC.send(req, delay_);
|
||||
ReqDC.push(req, delay_);
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
|
|
|
@ -38,8 +38,7 @@ void Warp::reset() {
|
|||
#if (XLEN == 64)
|
||||
PC_ = (uint64_t(core_->dcrs().base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) | PC_;
|
||||
#endif
|
||||
tmask_.reset();
|
||||
issued_instrs_ = 0;
|
||||
tmask_.reset();
|
||||
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) {
|
||||
for (auto& reg : ireg_file_.at(i)) {
|
||||
reg = 0;
|
||||
|
@ -51,7 +50,7 @@ void Warp::reset() {
|
|||
uui_gen_.reset();
|
||||
}
|
||||
|
||||
pipeline_trace_t* Warp::eval() {
|
||||
instr_trace_t* Warp::eval() {
|
||||
assert(tmask_.any());
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
@ -83,7 +82,7 @@ pipeline_trace_t* Warp::eval() {
|
|||
DP(1, "Instr 0x" << std::hex << instr_code << ": " << *instr);
|
||||
|
||||
// Create trace
|
||||
auto trace = new pipeline_trace_t(uuid, arch_);
|
||||
auto trace = new instr_trace_t(uuid, arch_);
|
||||
trace->cid = core_->id();
|
||||
trace->wid = warp_id_;
|
||||
trace->PC = PC_;
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace vortex {
|
|||
class Arch;
|
||||
class Core;
|
||||
class Instr;
|
||||
class pipeline_trace_t;
|
||||
class instr_trace_t;
|
||||
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
|
@ -79,22 +79,17 @@ public:
|
|||
return ireg_file_.at(0).at(reg);
|
||||
}
|
||||
|
||||
uint64_t incr_instrs() {
|
||||
return issued_instrs_++;
|
||||
}
|
||||
|
||||
pipeline_trace_t* eval();
|
||||
instr_trace_t* eval();
|
||||
|
||||
private:
|
||||
|
||||
void execute(const Instr &instr, pipeline_trace_t *trace);
|
||||
void execute(const Instr &instr, instr_trace_t *trace);
|
||||
|
||||
UUIDGenerator uui_gen_;
|
||||
|
||||
uint32_t warp_id_;
|
||||
const Arch& arch_;
|
||||
Core *core_;
|
||||
uint64_t issued_instrs_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue