simx refactoring: simobject::push(), instr_trace, FUtype, pending_instrs_

This commit is contained in:
Blaise Tine 2024-03-11 15:39:49 -07:00
parent 3ec37c6c40
commit ff6f33acff
20 changed files with 317 additions and 324 deletions

View file

@ -57,8 +57,6 @@ public:
, tx_cb_(nullptr)
{}
void send(const Pkt& pkt, uint64_t delay = 1) const;
void bind(SimPort<Pkt>* peer) {
assert(peer_ == nullptr);
peer_ = peer;
@ -88,13 +86,7 @@ public:
return queue_.front().pkt;
}
const Pkt& back() const {
return queue_.back();
}
Pkt& back() {
return queue_.back().pkt;
}
void push(const Pkt& pkt, uint64_t delay = 1) const;
uint64_t pop() {
auto cycles = queue_.front().cycles;
@ -122,12 +114,12 @@ protected:
SimPort* peer_;
TxCallback tx_cb_;
void push(const Pkt& data, uint64_t cycles) {
void transfer(const Pkt& data, uint64_t cycles) {
if (tx_cb_) {
tx_cb_(data, cycles);
}
if (peer_) {
peer_->push(data, cycles);
peer_->transfer(data, cycles);
} else {
queue_.push({data, cycles});
}
@ -199,7 +191,7 @@ template <typename Pkt>
class SimPortEvent : public SimEventBase {
public:
void fire() const override {
const_cast<SimPort<Pkt>*>(port_)->push(pkt_, cycles_);
const_cast<SimPort<Pkt>*>(port_)->transfer(pkt_, cycles_);
}
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
@ -409,9 +401,9 @@ typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
}
template <typename Pkt>
void SimPort<Pkt>::send(const Pkt& pkt, uint64_t delay) const {
void SimPort<Pkt>::push(const Pkt& pkt, uint64_t delay) const {
if (peer_ && !tx_cb_) {
reinterpret_cast<const SimPort<Pkt>*>(peer_)->send(pkt, delay);
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
} else {
SimPlatform::instance().schedule(this, pkt, delay);
}

View file

@ -492,7 +492,7 @@ private:
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-core-" << core_rsp);
}
@ -502,13 +502,13 @@ private:
{
MemReq mem_req(core_req);
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
bypass_switch_->ReqIn.at(1).push(mem_req, 1);
DT(3, simobject_->name() << "-dram-" << mem_req);
}
if (core_req.write && config_.write_reponse) {
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
simobject_->CoreRspPorts.at(req_id).send(core_rsp, 1);
simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1);
DT(3, simobject_->name() << "-core-" << core_rsp);
}
}
@ -538,7 +538,7 @@ private:
if (!info.valid)
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-core-" << core_rsp);
}
}
@ -582,7 +582,7 @@ private:
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).send(mem_req, 1);
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-dram-" << mem_req);
} else {
// mark line as dirty
@ -595,7 +595,7 @@ private:
if (!info.valid)
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-core-" << core_rsp);
}
}
@ -614,7 +614,7 @@ private:
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_line.tag);
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req_ports_.at(bank_id).send(mem_req, 1);
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-dram-" << mem_req);
++perf_stats_.evictions;
}
@ -628,7 +628,7 @@ private:
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).send(mem_req, 1);
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-dram-" << mem_req);
}
// send core response
@ -637,7 +637,7 @@ private:
if (!info.valid)
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-core-" << core_rsp);
}
}
@ -656,7 +656,7 @@ private:
mem_req.tag = mshr_id;
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).send(mem_req, 1);
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-dram-" << mem_req);
++pending_fill_reqs_;
}

View file

@ -49,11 +49,9 @@ Core::Core(const SimContext& ctx,
, ibuffers_(arch.num_warps(), IBUF_SIZE)
, scoreboard_(arch_)
, operands_(ISSUE_WIDTH)
, dispatchers_((uint32_t)ExeType::ExeTypeCount)
, exe_units_((uint32_t)ExeType::ExeTypeCount)
, dispatchers_((uint32_t)FUType::Count)
, exe_units_((uint32_t)FUType::Count)
, lmem_demuxs_(NUM_LSU_LANES)
, fetch_latch_("fetch")
, decode_latch_("decode")
, pending_icache_(arch_.num_warps())
, csrs_(arch.num_warps())
, commit_arbs_(ISSUE_WIDTH)
@ -95,22 +93,22 @@ Core::Core(const SimContext& ctx,
}
// initialize dispatchers
dispatchers_.at((int)ExeType::ALU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_ALU_BLOCKS, NUM_ALU_LANES);
dispatchers_.at((int)ExeType::FPU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES);
dispatchers_.at((int)ExeType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_LSU_LANES);
dispatchers_.at((int)ExeType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_SFU_LANES);
dispatchers_.at((int)FUType::ALU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_ALU_BLOCKS, NUM_ALU_LANES);
dispatchers_.at((int)FUType::FPU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES);
dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_LSU_LANES);
dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, 1, NUM_SFU_LANES);
// initialize execute units
exe_units_.at((int)ExeType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
exe_units_.at((int)ExeType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
exe_units_.at((int)ExeType::LSU) = SimPlatform::instance().create_object<LsuUnit>(this);
exe_units_.at((int)ExeType::SFU) = SimPlatform::instance().create_object<SfuUnit>(this);
exe_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
exe_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
exe_units_.at((int)FUType::LSU) = SimPlatform::instance().create_object<LsuUnit>(this);
exe_units_.at((int)FUType::SFU) = SimPlatform::instance().create_object<SfuUnit>(this);
// bind commit arbiters
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)ExeType::ExeTypeCount, 1);
for (uint32_t j = 0; j < (uint32_t)ExeType::ExeTypeCount; ++j) {
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
exe_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
}
commit_arbs_.at(i) = arbiter;
@ -157,8 +155,7 @@ void Core::reset() {
decode_latch_.clear();
pending_icache_.clear();
stalled_warps_.reset();
issued_instrs_ = 0;
committed_instrs_ = 0;
pending_instrs_ = 0;
exited_ = false;
perf_stats_ = PerfStats();
pending_ifetches_ = 0;
@ -204,7 +201,7 @@ void Core::schedule() {
// advance to fetch stage
fetch_latch_.push(trace);
++issued_instrs_;
++pending_instrs_;
}
void Core::fetch() {
@ -232,7 +229,7 @@ void Core::fetch() {
mem_req.tag = pending_icache_.allocate(trace);
mem_req.cid = trace->cid;
mem_req.uuid = trace->uuid;
icache_req_ports.at(0).send(mem_req, 2);
icache_req_ports.at(0).push(mem_req, 2);
DT(3, "icache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace);
fetch_latch_.pop();
++perf_stats_.ifetches;
@ -278,7 +275,7 @@ void Core::issue() {
if (operand->Output.empty())
continue;
auto trace = operand->Output.front();
if (dispatchers_.at((int)trace->exe_type)->push(i, trace)) {
if (dispatchers_.at((int)trace->fu_type)->push(i, trace)) {
operand->Output.pop();
trace->log_once(false);
} else {
@ -312,11 +309,11 @@ void Core::issue() {
}
for (uint32_t j = 0, n = uses.size(); j < n; ++j) {
auto& use = uses.at(j);
switch (use.exe_type) {
case ExeType::ALU: ++perf_stats_.scrb_alu; break;
case ExeType::FPU: ++perf_stats_.scrb_fpu; break;
case ExeType::LSU: ++perf_stats_.scrb_lsu; break;
case ExeType::SFU: {
switch (use.fu_type) {
case FUType::ALU: ++perf_stats_.scrb_alu; break;
case FUType::FPU: ++perf_stats_.scrb_fpu; break;
case FUType::LSU: ++perf_stats_.scrb_lsu; break;
case FUType::SFU: {
++perf_stats_.scrb_sfu;
switch (use.sfu_type) {
case SfuType::TMC:
@ -348,7 +345,7 @@ void Core::issue() {
DT(3, "pipeline-scoreboard: " << *trace);
// to operand stage
operands_.at(i)->Input.send(trace, 1);
operands_.at(i)->Input.push(trace, 1);
ibuffer.pop();
}
@ -356,14 +353,14 @@ void Core::issue() {
}
void Core::execute() {
for (uint32_t i = 0; i < (uint32_t)ExeType::ExeTypeCount; ++i) {
for (uint32_t i = 0; i < (uint32_t)FUType::Count; ++i) {
auto& dispatch = dispatchers_.at(i);
auto& exe_unit = exe_units_.at(i);
for (uint32_t j = 0; j < ISSUE_WIDTH; ++j) {
if (dispatch->Outputs.at(j).empty())
continue;
auto trace = dispatch->Outputs.at(j).front();
exe_unit->Inputs.at(j).send(trace, 1);
exe_unit->Inputs.at(j).push(trace, 1);
dispatch->Outputs.at(j).pop();
}
}
@ -387,8 +384,7 @@ void Core::commit() {
scoreboard_.release(trace);
}
assert(committed_instrs_ <= issued_instrs_);
++committed_instrs_;
--pending_instrs_;
perf_stats_.instrs += trace->tmask.count();
}
@ -743,7 +739,7 @@ bool Core::check_exit(Word* exitcode, bool riscv_test) const {
}
bool Core::running() const {
return (committed_instrs_ != issued_instrs_);
return (pending_instrs_ != 0);
}
void Core::resume() {

View file

@ -42,7 +42,7 @@ namespace vortex {
class Socket;
using TraceSwitch = Mux<pipeline_trace_t*>;
using TraceSwitch = Mux<instr_trace_t*>;
class Core : public SimObject<Core> {
public:
@ -187,11 +187,10 @@ private:
PipelineLatch fetch_latch_;
PipelineLatch decode_latch_;
HashTable<pipeline_trace_t*> pending_icache_;
HashTable<instr_trace_t*> pending_icache_;
WarpMask active_warps_;
WarpMask stalled_warps_;
uint64_t issued_instrs_;
uint64_t committed_instrs_;
uint64_t pending_instrs_;
bool exited_;
uint64_t pending_ifetches_;

View file

@ -13,21 +13,21 @@
#pragma once
#include "pipeline.h"
#include "instr_trace.h"
#include <queue>
namespace vortex {
class Dispatcher : public SimObject<Dispatcher> {
public:
std::vector<SimPort<pipeline_trace_t*>> Outputs;
std::vector<SimPort<instr_trace_t*>> Outputs;
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
: SimObject<Dispatcher>(ctx, "Dispatcher")
, Outputs(ISSUE_WIDTH, this)
, Inputs_(ISSUE_WIDTH, this)
, arch_(arch)
, queues_(ISSUE_WIDTH, std::queue<pipeline_trace_t*>())
, queues_(ISSUE_WIDTH, std::queue<instr_trace_t*>())
, buf_size_(buf_size)
, block_size_(block_size)
, num_lanes_(num_lanes)
@ -52,7 +52,7 @@ public:
if (queue.empty())
continue;
auto trace = queue.front();
Inputs_.at(i).send(trace, 1);
Inputs_.at(i).push(trace, 1);
queue.pop();
}
@ -84,7 +84,7 @@ public:
start /= num_lanes_;
end /= num_lanes_;
if (start != end) {
new_trace = new pipeline_trace_t(*trace);
new_trace = new instr_trace_t(*trace);
new_trace->eop = false;
start_p_.at(b) = start + 1;
} else {
@ -105,7 +105,7 @@ public:
++block_sent;
}
DT(3, "pipeline-dispatch: " << *new_trace);
output.send(new_trace, 1);
output.push(new_trace, 1);
}
if (block_sent == block_size_) {
batch_idx_ = (batch_idx_ + 1) % batch_count_;
@ -115,7 +115,7 @@ public:
}
};
bool push(uint32_t issue_index, pipeline_trace_t* trace) {
bool push(uint32_t issue_index, instr_trace_t* trace) {
auto& queue = queues_.at(issue_index);
if (queue.size() >= buf_size_)
return false;
@ -124,9 +124,9 @@ public:
}
private:
std::vector<SimPort<pipeline_trace_t*>> Inputs_;
std::vector<SimPort<instr_trace_t*>> Inputs_;
const Arch& arch_;
std::vector<std::queue<pipeline_trace_t*>> queues_;
std::vector<std::queue<instr_trace_t*>> queues_;
uint32_t buf_size_;
uint32_t block_size_;
uint32_t num_lanes_;

View file

@ -38,10 +38,10 @@ void AluUnit::tick() {
case AluType::BRANCH:
case AluType::SYSCALL:
case AluType::IMUL:
output.send(trace, LATENCY_IMUL+1);
output.push(trace, LATENCY_IMUL+1);
break;
case AluType::IDIV:
output.send(trace, XLEN+1);
output.push(trace, XLEN+1);
break;
default:
std::abort();
@ -68,19 +68,19 @@ void FpuUnit::tick() {
auto trace = input.front();
switch (trace->fpu_type) {
case FpuType::FNCP:
output.send(trace, 2);
output.push(trace, 2);
break;
case FpuType::FMA:
output.send(trace, LATENCY_FMA+1);
output.push(trace, LATENCY_FMA+1);
break;
case FpuType::FDIV:
output.send(trace, LATENCY_FDIV+1);
output.push(trace, LATENCY_FDIV+1);
break;
case FpuType::FSQRT:
output.send(trace, LATENCY_FSQRT+1);
output.push(trace, LATENCY_FSQRT+1);
break;
case FpuType::FCVT:
output.send(trace, LATENCY_FCVT+1);
output.push(trace, LATENCY_FCVT+1);
break;
default:
std::abort();
@ -125,7 +125,7 @@ void LsuUnit::tick() {
if (0 == entry.count) {
int iw = trace->wid % ISSUE_WIDTH;
auto& output = Outputs.at(iw);
output.send(trace, 1);
output.push(trace, 1);
pending_rd_reqs_.release(mem_rsp.tag);
}
dcache_rsp_port.pop();
@ -146,7 +146,7 @@ void LsuUnit::tick() {
if (0 == entry.count) {
int iw = trace->wid % ISSUE_WIDTH;
auto& output = Outputs.at(iw);
output.send(trace, 1);
output.push(trace, 1);
pending_rd_reqs_.release(mem_rsp.tag);
}
lmem_rsp_port.pop();
@ -159,7 +159,7 @@ void LsuUnit::tick() {
return;
int iw = fence_state_->wid % ISSUE_WIDTH;
auto& output = Outputs.at(iw);
output.send(fence_state_, 1);
output.push(fence_state_, 1);
fence_lock_ = false;
DT(3, "fence-unlock: " << fence_state_);
}
@ -240,7 +240,7 @@ void LsuUnit::tick() {
mem_req.cid = trace->cid;
mem_req.uuid = trace->uuid;
dcache_req_port.send(mem_req, 1);
dcache_req_port.push(mem_req, 1);
DT(3, "dcache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
<< ", lsu_type=" << trace->lsu_type << ", tid=" << t << ", addr_type=" << mem_req.type << ", " << *trace);
@ -257,7 +257,7 @@ void LsuUnit::tick() {
// do not wait on writes
if (is_write) {
pending_rd_reqs_.release(tag);
output.send(trace, 1);
output.push(trace, 1);
}
// remove input
@ -296,10 +296,10 @@ void SfuUnit::tick() {
case SfuType::CSRRW:
case SfuType::CSRRS:
case SfuType::CSRRC:
output.send(trace, 1);
output.push(trace, 1);
break;
case SfuType::BAR: {
output.send(trace, 1);
output.push(trace, 1);
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
if (trace->eop) {
core_->barrier(trace_data->bar.id, trace_data->bar.count, trace->wid);
@ -307,7 +307,7 @@ void SfuUnit::tick() {
release_warp = false;
} break;
case SfuType::CMOV:
output.send(trace, 3);
output.push(trace, 3);
break;
default:
std::abort();

View file

@ -14,8 +14,7 @@
#pragma once
#include <simobject.h>
#include "pipeline.h"
#include "cache_sim.h"
#include "instr_trace.h"
namespace vortex {
@ -23,8 +22,8 @@ class Core;
class ExeUnit : public SimObject<ExeUnit> {
public:
std::vector<SimPort<pipeline_trace_t*>> Inputs;
std::vector<SimPort<pipeline_trace_t*>> Outputs;
std::vector<SimPort<instr_trace_t*>> Inputs;
std::vector<SimPort<instr_trace_t*>> Outputs;
ExeUnit(const SimContext& ctx, Core* core, const char* name)
: SimObject<ExeUnit>(ctx, name)
@ -73,12 +72,12 @@ public:
private:
struct pending_req_t {
pipeline_trace_t* trace;
instr_trace_t* trace;
uint32_t count;
};
HashTable<pending_req_t> pending_rd_reqs_;
uint32_t num_lanes_;
pipeline_trace_t* fence_state_;
instr_trace_t* fence_state_;
uint64_t pending_loads_;
bool fence_lock_;
uint32_t input_idx_;

View file

@ -66,7 +66,7 @@ inline int64_t check_boxing(int64_t a) {
return nan_box(0x7fc00000); // NaN
}
void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
void Warp::execute(const Instr &instr, instr_trace_t *trace) {
assert(tmask_.any());
auto next_pc = PC_ + 4;
@ -136,7 +136,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
switch (opcode) {
case Opcode::LUI: {
// RV32I: LUI
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
for (uint32_t t = thread_start; t < num_threads; ++t) {
if (!tmask_.test(t))
@ -148,7 +148,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case Opcode::AUIPC: {
// RV32I: AUIPC
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
for (uint32_t t = thread_start; t < num_threads; ++t) {
if (!tmask_.test(t))
@ -159,7 +159,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case Opcode::R: {
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -320,7 +320,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case Opcode::I: {
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
trace->used_iregs.set(rsrc0);
for (uint32_t t = thread_start; t < num_threads; ++t) {
@ -380,7 +380,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case Opcode::R_W: {
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -507,7 +507,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case Opcode::I_W: {
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
trace->used_iregs.set(rsrc0);
for (uint32_t t = thread_start; t < num_threads; ++t) {
@ -550,7 +550,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case Opcode::B: {
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::BRANCH;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -610,7 +610,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case Opcode::JAL: {
// RV32I: JAL
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::BRANCH;
for (uint32_t t = thread_start; t < num_threads; ++t) {
if (!tmask_.test(t))
@ -624,7 +624,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case Opcode::JALR: {
// RV32I: JALR
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::BRANCH;
trace->used_iregs.set(rsrc0);
for (uint32_t t = thread_start; t < num_threads; ++t) {
@ -639,7 +639,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case Opcode::L:
case Opcode::FL: {
trace->exe_type = ExeType::LSU;
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::LOAD;
trace->used_iregs.set(rsrc0);
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
@ -683,7 +683,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case Opcode::S:
case Opcode::FS: {
trace->exe_type = ExeType::LSU;
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::STORE;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -710,7 +710,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
break;
}
case Opcode::AMO: {
trace->exe_type = ExeType::LSU;
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::LOAD;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -790,7 +790,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
uint32_t csr_addr = immsrc;
uint32_t csr_value;
if (func3 == 0) {
trace->exe_type = ExeType::ALU;
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::SYSCALL;
trace->fetch_stall = true;
switch (csr_addr) {
@ -810,7 +810,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
std::abort();
}
} else {
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->fetch_stall = true;
csr_value = core_->get_csr(csr_addr, t, warp_id_);
switch (func3) {
@ -882,12 +882,12 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case Opcode::FENCE: {
// RV32I: FENCE
trace->exe_type = ExeType::LSU;
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::FENCE;
break;
}
case Opcode::FCI: {
trace->exe_type = ExeType::FPU;
trace->fu_type = FUType::FPU;
for (uint32_t t = thread_start; t < num_threads; ++t) {
if (!tmask_.test(t))
continue;
@ -1271,7 +1271,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
switch (func3) {
case 0: {
// TMC
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::TMC;
trace->used_iregs.set(rsrc0);
trace->fetch_stall = true;
@ -1282,7 +1282,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
} break;
case 1: {
// WSPAWN
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::WSPAWN;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -1291,7 +1291,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
} break;
case 2: {
// SPLIT
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::SPLIT;
trace->used_iregs.set(rsrc0);
trace->fetch_stall = true;
@ -1324,7 +1324,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
} break;
case 3: {
// JOIN
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::JOIN;
trace->used_iregs.set(rsrc0);
trace->fetch_stall = true;
@ -1344,7 +1344,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
} break;
case 4: {
// BAR
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::BAR;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -1353,7 +1353,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
} break;
case 5: {
// PRED
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::PRED;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);
@ -1381,7 +1381,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
case 1:
switch (func2) {
case 0: { // CMOV
trace->exe_type = ExeType::SFU;
trace->fu_type = FUType::SFU;
trace->sfu_type = SfuType::CMOV;
trace->used_iregs.set(rsrc0);
trace->used_iregs.set(rsrc1);

View file

@ -13,7 +13,7 @@
#pragma once
#include "pipeline.h"
#include "instr_trace.h"
#include <queue>
namespace vortex {
@ -32,11 +32,11 @@ public:
return (entries_.size() == capacity_);
}
pipeline_trace_t* top() const {
instr_trace_t* top() const {
return entries_.front();
}
void push(pipeline_trace_t* trace) {
void push(instr_trace_t* trace) {
entries_.emplace(trace);
}
@ -45,12 +45,12 @@ public:
}
void clear() {
std::queue<pipeline_trace_t*> empty;
std::queue<instr_trace_t*> empty;
std::swap(entries_, empty );
}
private:
std::queue<pipeline_trace_t*> entries_;
std::queue<instr_trace_t*> entries_;
uint32_t capacity_;
};

View file

@ -17,8 +17,6 @@
namespace vortex {
class Warp;
enum class Opcode {
NONE = 0,
R = 0x33,

170
sim/simx/instr_trace.h Normal file
View file

@ -0,0 +1,170 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <iostream>
#include <util.h>
#include "types.h"
#include "arch.h"
#include "debug.h"
namespace vortex {
class ITraceData {
public:
using Ptr = std::shared_ptr<ITraceData>;
ITraceData() {}
virtual ~ITraceData() {}
};
struct LsuTraceData : public ITraceData {
using Ptr = std::shared_ptr<LsuTraceData>;
std::vector<mem_addr_size_t> mem_addrs;
LsuTraceData(uint32_t num_threads) : mem_addrs(num_threads) {}
};
struct SFUTraceData : public ITraceData {
using Ptr = std::shared_ptr<SFUTraceData>;
struct {
uint32_t id;
uint32_t count;
} bar;
SFUTraceData(uint32_t bar_id, uint32_t bar_count) : bar{bar_id, bar_count} {}
};
struct instr_trace_t {
public:
//--
const uint64_t uuid;
const Arch& arch;
//--
uint32_t cid;
uint32_t wid;
ThreadMask tmask;
Word PC;
//--
uint32_t rdest;
RegType rdest_type;
bool wb;
//--
RegMask used_iregs;
RegMask used_fregs;
RegMask used_vregs;
//-
FUType fu_type;
//--
union {
uint32_t unit_type;
LsuType lsu_type;
AluType alu_type;
FpuType fpu_type;
SfuType sfu_type;
};
ITraceData::Ptr data;
int pid;
bool sop;
bool eop;
bool fetch_stall;
instr_trace_t(uint64_t uuid, const Arch& arch)
: uuid(uuid)
, arch(arch)
, cid(0)
, wid(0)
, tmask(0)
, PC(0)
, rdest(0)
, rdest_type(RegType::None)
, wb(false)
, used_iregs(0)
, used_fregs(0)
, used_vregs(0)
, fu_type(FUType::ALU)
, unit_type(0)
, data(nullptr)
, pid(-1)
, sop(true)
, eop(true)
, fetch_stall(false)
, log_once_(false)
{}
instr_trace_t(const instr_trace_t& rhs)
: uuid(rhs.uuid)
, arch(rhs.arch)
, cid(rhs.cid)
, wid(rhs.wid)
, tmask(rhs.tmask)
, PC(rhs.PC)
, rdest(rhs.rdest)
, rdest_type(rhs.rdest_type)
, wb(rhs.wb)
, used_iregs(rhs.used_iregs)
, used_fregs(rhs.used_fregs)
, used_vregs(rhs.used_vregs)
, fu_type(rhs.fu_type)
, unit_type(rhs.unit_type)
, data(rhs.data)
, pid(rhs.pid)
, sop(rhs.sop)
, eop(rhs.eop)
, fetch_stall(rhs.fetch_stall)
, log_once_(false)
{}
~instr_trace_t() {}
bool log_once(bool enable) {
bool old = log_once_;
log_once_ = enable;
return old;
}
private:
bool log_once_;
};
inline std::ostream &operator<<(std::ostream &os, const instr_trace_t& trace) {
os << "cid=" << trace.cid;
os << ", wid=" << trace.wid;
os << ", tmask=";
for (uint32_t i = 0, n = trace.arch.num_threads(); i < n; ++i) {
os << trace.tmask.test(i);
}
os << ", PC=0x" << std::hex << trace.PC;
os << ", wb=" << trace.wb;
if (trace.wb) {
os << ", rd=" << trace.rdest_type << std::dec << trace.rdest;
}
os << ", ex=" << trace.fu_type;
if (trace.pid != -1) {
os << ", pid=" << trace.pid;
os << ", sop=" << trace.sop;
os << ", eop=" << trace.eop;
}
os << " (#" << std::dec << trace.uuid << ")";
return os;
}
}

View file

@ -87,7 +87,7 @@ public:
if (!core_req.write || config_.write_reponse) {
// send response
MemRsp core_rsp{core_req.tag, core_req.cid};
simobject_->Outputs.at(req_id).send(core_rsp, 1);
simobject_->Outputs.at(req_id).push(core_rsp, 1);
}
// update perf counters

View file

@ -69,7 +69,7 @@ public:
if (req.type == ramulator::Request::Type::WRITE)
return;
MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid};
simobject_->MemRspPort.send(mem_rsp, 1);
simobject_->MemRspPort.push(mem_rsp, 1);
DT(3, simobject_->name() << "-" << mem_rsp);
}

View file

@ -13,15 +13,15 @@
#pragma once
#include "pipeline.h"
#include "instr_trace.h"
#include <queue>
namespace vortex {
class Operand : public SimObject<Operand> {
public:
SimPort<pipeline_trace_t*> Input;
SimPort<pipeline_trace_t*> Output;
SimPort<instr_trace_t*> Input;
SimPort<instr_trace_t*> Output;
Operand(const SimContext& ctx)
: SimObject<Operand>(ctx, "Operand")
@ -50,7 +50,7 @@ public:
}
}
Output.send(trace, delay);
Output.push(trace, delay);
DT(3, "pipeline-operands: " << *trace);

View file

@ -14,178 +14,24 @@
#pragma once
#include <memory>
#include <iostream>
#include <util.h>
#include "types.h"
#include "arch.h"
#include "debug.h"
#include "instr_trace.h"
namespace vortex {
class ITraceData {
public:
using Ptr = std::shared_ptr<ITraceData>;
ITraceData() {}
virtual ~ITraceData() {}
};
struct LsuTraceData : public ITraceData {
using Ptr = std::shared_ptr<LsuTraceData>;
std::vector<mem_addr_size_t> mem_addrs;
LsuTraceData(uint32_t num_threads) : mem_addrs(num_threads) {}
};
struct SFUTraceData : public ITraceData {
using Ptr = std::shared_ptr<SFUTraceData>;
struct {
uint32_t id;
uint32_t count;
} bar;
SFUTraceData(uint32_t bar_id, uint32_t bar_count) : bar{bar_id, bar_count} {}
};
struct pipeline_trace_t {
public:
//--
const uint64_t uuid;
const Arch& arch;
//--
uint32_t cid;
uint32_t wid;
ThreadMask tmask;
Word PC;
//--
uint32_t rdest;
RegType rdest_type;
bool wb;
//--
RegMask used_iregs;
RegMask used_fregs;
RegMask used_vregs;
//-
ExeType exe_type;
//--
union {
uint32_t unit_type;
LsuType lsu_type;
AluType alu_type;
FpuType fpu_type;
SfuType sfu_type;
};
ITraceData::Ptr data;
int pid;
bool sop;
bool eop;
bool fetch_stall;
pipeline_trace_t(uint64_t uuid, const Arch& arch)
: uuid(uuid)
, arch(arch)
, cid(0)
, wid(0)
, tmask(0)
, PC(0)
, rdest(0)
, rdest_type(RegType::None)
, wb(false)
, used_iregs(0)
, used_fregs(0)
, used_vregs(0)
, exe_type(ExeType::ALU)
, unit_type(0)
, data(nullptr)
, pid(-1)
, sop(true)
, eop(true)
, fetch_stall(false)
, log_once_(false)
{}
pipeline_trace_t(const pipeline_trace_t& rhs)
: uuid(rhs.uuid)
, arch(rhs.arch)
, cid(rhs.cid)
, wid(rhs.wid)
, tmask(rhs.tmask)
, PC(rhs.PC)
, rdest(rhs.rdest)
, rdest_type(rhs.rdest_type)
, wb(rhs.wb)
, used_iregs(rhs.used_iregs)
, used_fregs(rhs.used_fregs)
, used_vregs(rhs.used_vregs)
, exe_type(rhs.exe_type)
, unit_type(rhs.unit_type)
, data(rhs.data)
, pid(rhs.pid)
, sop(rhs.sop)
, eop(rhs.eop)
, fetch_stall(rhs.fetch_stall)
, log_once_(false)
{}
~pipeline_trace_t() {}
bool log_once(bool enable) {
bool old = log_once_;
log_once_ = enable;
return old;
}
private:
bool log_once_;
};
inline std::ostream &operator<<(std::ostream &os, const pipeline_trace_t& state) {
os << "cid=" << state.cid;
os << ", wid=" << state.wid;
os << ", tmask=";
for (uint32_t i = 0, n = state.arch.num_threads(); i < n; ++i) {
os << state.tmask.test(i);
}
os << ", PC=0x" << std::hex << state.PC;
os << ", wb=" << state.wb;
if (state.wb) {
os << ", rd=" << state.rdest_type << std::dec << state.rdest;
}
os << ", ex=" << state.exe_type;
if (state.pid != -1) {
os << ", pid=" << state.pid;
os << ", sop=" << state.sop;
os << ", eop=" << state.eop;
}
os << " (#" << std::dec << state.uuid << ")";
return os;
}
class PipelineLatch {
public:
PipelineLatch(const char* name = nullptr)
: name_(name)
{}
PipelineLatch() {}
~PipelineLatch() {}
bool empty() const {
return queue_.empty();
}
pipeline_trace_t* front() {
instr_trace_t* front() {
return queue_.front();
}
pipeline_trace_t* back() {
return queue_.back();
}
void push(pipeline_trace_t* value) {
void push(instr_trace_t* value) {
queue_.push(value);
}
@ -194,13 +40,12 @@ public:
}
void clear() {
std::queue<pipeline_trace_t*> empty;
std::swap(queue_, empty );
std::queue<instr_trace_t*> empty;
std::swap(queue_, empty);
}
protected:
const char* name_;
std::queue<pipeline_trace_t*> queue_;
std::queue<instr_trace_t*> queue_;
};
}

View file

@ -13,7 +13,7 @@
#pragma once
#include "pipeline.h"
#include "instr_trace.h"
#include <queue>
namespace vortex {
@ -24,7 +24,7 @@ public:
struct reg_use_t {
RegType reg_type;
uint32_t reg_id;
ExeType exe_type;
FUType fu_type;
SfuType sfu_type;
uint64_t uuid;
};
@ -44,12 +44,12 @@ public:
owners_.clear();
}
bool in_use(pipeline_trace_t* trace) const {
bool in_use(instr_trace_t* trace) const {
return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0
|| (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0;
}
std::vector<reg_use_t> get_uses(pipeline_trace_t* trace) const {
std::vector<reg_use_t> get_uses(instr_trace_t* trace) const {
std::vector<reg_use_t> out;
auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid);
@ -59,7 +59,7 @@ public:
if (used_iregs.test(r)) {
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer;
auto owner = owners_.at(tag);
out.push_back({RegType::Integer, r, owner->exe_type, owner->sfu_type, owner->uuid});
out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid});
}
}
@ -67,14 +67,14 @@ public:
if (used_fregs.test(r)) {
uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float;
auto owner = owners_.at(tag);
out.push_back({RegType::Float, r, owner->exe_type, owner->sfu_type, owner->uuid});
out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid});
}
}
return out;
}
void reserve(pipeline_trace_t* trace) {
void reserve(instr_trace_t* trace) {
assert(trace->wb);
switch (trace->rdest_type) {
case RegType::Integer:
@ -89,10 +89,10 @@ public:
uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type;
assert(owners_.count(tag) == 0);
owners_[tag] = trace;
assert((int)trace->exe_type < 5);
assert((int)trace->fu_type < 5);
}
void release(pipeline_trace_t* trace) {
void release(instr_trace_t* trace) {
assert(trace->wb);
switch (trace->rdest_type) {
case RegType::Integer:
@ -112,7 +112,7 @@ private:
std::vector<RegMask> in_use_iregs_;
std::vector<RegMask> in_use_fregs_;
std::unordered_map<uint32_t, pipeline_trace_t*> owners_;
std::unordered_map<uint32_t, instr_trace_t*> owners_;
};
}

BIN
sim/simx/simx Executable file

Binary file not shown.

View file

@ -75,20 +75,20 @@ inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
///////////////////////////////////////////////////////////////////////////////
enum class ExeType {
enum class FUType {
ALU,
LSU,
FPU,
SFU,
ExeTypeCount
Count
};
inline std::ostream &operator<<(std::ostream &os, const ExeType& type) {
inline std::ostream &operator<<(std::ostream &os, const FUType& type) {
switch (type) {
case ExeType::ALU: os << "ALU"; break;
case ExeType::LSU: os << "LSU"; break;
case ExeType::FPU: os << "FPU"; break;
case ExeType::SFU: os << "SFU"; break;
case FUType::ALU: os << "ALU"; break;
case FUType::LSU: os << "LSU"; break;
case FUType::FPU: os << "FPU"; break;
case FUType::SFU: os << "SFU"; break;
default: assert(false);
}
return os;
@ -417,7 +417,7 @@ public:
if (!req_in.empty()) {
auto& req = req_in.front();
DT(4, this->name() << "-" << req);
Outputs.at(o).send(req, delay_);
Outputs.at(o).push(req, delay_);
req_in.pop();
this->update_cursor(o, i);
break;
@ -513,7 +513,7 @@ public:
req.tag = (req.tag << lg_num_reqs_) | i;
}
DT(4, this->name() << "-" << req);
ReqOut.at(o).send(req, delay_);
ReqOut.at(o).push(req, delay_);
req_in.pop();
this->update_cursor(o, i);
break;
@ -530,7 +530,7 @@ public:
}
DT(4, this->name() << "-" << rsp);
uint32_t j = o * R + i;
RspIn.at(j).send(rsp, 1);
RspIn.at(j).push(rsp, 1);
RspOut.at(o).pop();
}
}
@ -583,13 +583,13 @@ public:
if (!RspSM.empty()) {
auto& rsp = RspSM.front();
DT(4, this->name() << "-" << rsp);
RspIn.send(rsp, 1);
RspIn.push(rsp, 1);
RspSM.pop();
}
if (!RspDC.empty()) {
auto& rsp = RspDC.front();
DT(4, this->name() << "-" << rsp);
RspIn.send(rsp, 1);
RspIn.push(rsp, 1);
RspDC
.pop();
}
@ -598,9 +598,9 @@ public:
auto& req = ReqIn.front();
DT(4, this->name() << "-" << req);
if (req.type == AddrType::Shared) {
ReqSM.send(req, delay_);
ReqSM.push(req, delay_);
} else {
ReqDC.send(req, delay_);
ReqDC.push(req, delay_);
}
ReqIn.pop();
}

View file

@ -38,8 +38,7 @@ void Warp::reset() {
#if (XLEN == 64)
PC_ = (uint64_t(core_->dcrs().base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) | PC_;
#endif
tmask_.reset();
issued_instrs_ = 0;
tmask_.reset();
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) {
for (auto& reg : ireg_file_.at(i)) {
reg = 0;
@ -51,7 +50,7 @@ void Warp::reset() {
uui_gen_.reset();
}
pipeline_trace_t* Warp::eval() {
instr_trace_t* Warp::eval() {
assert(tmask_.any());
#ifndef NDEBUG
@ -83,7 +82,7 @@ pipeline_trace_t* Warp::eval() {
DP(1, "Instr 0x" << std::hex << instr_code << ": " << *instr);
// Create trace
auto trace = new pipeline_trace_t(uuid, arch_);
auto trace = new instr_trace_t(uuid, arch_);
trace->cid = core_->id();
trace->wid = warp_id_;
trace->PC = PC_;

View file

@ -23,7 +23,7 @@ namespace vortex {
class Arch;
class Core;
class Instr;
class pipeline_trace_t;
class instr_trace_t;
struct DomStackEntry {
DomStackEntry(const ThreadMask &tmask, Word PC)
@ -79,22 +79,17 @@ public:
return ireg_file_.at(0).at(reg);
}
uint64_t incr_instrs() {
return issued_instrs_++;
}
pipeline_trace_t* eval();
instr_trace_t* eval();
private:
void execute(const Instr &instr, pipeline_trace_t *trace);
void execute(const Instr &instr, instr_trace_t *trace);
UUIDGenerator uui_gen_;
uint32_t warp_id_;
const Arch& arch_;
Core *core_;
uint64_t issued_instrs_;
Word PC_;
ThreadMask tmask_;