minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2024-12-04 06:00:19 -08:00
parent 30b0daf050
commit 3ace9bbeda
18 changed files with 476 additions and 178 deletions

View file

@ -105,7 +105,7 @@ regression()
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
# test for matmul
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
echo "regression tests done!"
}
@ -322,6 +322,10 @@ config2()
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
# test memory ports
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo --threads=32
echo "configuration-2 tests done!"
}

View file

@ -648,9 +648,9 @@
// Number of Memory Ports
`ifndef L1_MEM_PORTS
`ifdef L1_DISABLE
`define L1_MEM_PORTS `L2_MEM_PORTS
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`else
`define L1_MEM_PORTS `MIN(`L2_MEM_PORTS, `DCACHE_NUM_BANKS)
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`endif
`endif
@ -727,9 +727,9 @@
// Number of Memory Ports
`ifndef L2_MEM_PORTS
`ifdef L2_ENABLE
`define L2_MEM_PORTS `MIN(`L3_MEM_PORTS, `L2_NUM_BANKS)
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`else
`define L2_MEM_PORTS `L3_MEM_PORTS
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`endif
`endif
@ -788,9 +788,9 @@
// Number of Memory Ports
`ifndef L3_MEM_PORTS
`ifdef L3_ENABLE
`define L3_MEM_PORTS `MIN(`PLATFORM_MEMORY_BANKS, `L3_NUM_BANKS)
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`else
`define L3_MEM_PORTS `PLATFORM_MEMORY_BANKS
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`endif
`endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -27,9 +27,9 @@ class SimObjectBase;
///////////////////////////////////////////////////////////////////////////////
class SimPortBase {
public:
public:
virtual ~SimPortBase() {}
SimObjectBase* module() const {
return module_;
}
@ -92,7 +92,7 @@ public:
auto cycles = queue_.front().cycles;
queue_.pop();
return cycles;
}
}
void tx_callback(const TxCallback& callback) {
tx_cb_ = callback;
@ -137,7 +137,7 @@ public:
typedef std::shared_ptr<SimEventBase> Ptr;
virtual ~SimEventBase() {}
virtual void fire() const = 0;
uint64_t cycles() const {
@ -161,7 +161,7 @@ public:
typedef std::function<void (const Pkt&)> Func;
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
: SimEventBase(cycles)
, func_(func)
, pkt_(pkt)
@ -194,8 +194,8 @@ public:
const_cast<SimPort<Pkt>*>(port_)->transfer(pkt_, cycles_);
}
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
: SimEventBase(cycles)
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
: SimEventBase(cycles)
, port_(port)
, pkt_(pkt)
{}
@ -209,7 +209,7 @@ public:
}
protected:
const SimPort<Pkt>* port_;
const SimPort<Pkt>* port_;
Pkt pkt_;
static MemoryPool<SimPortEvent<Pkt>> allocator_;
@ -230,11 +230,11 @@ public:
const std::string& name() const {
return name_;
}
}
protected:
SimObjectBase(const SimContext& ctx, const char* name);
SimObjectBase(const SimContext& ctx, const std::string& name);
private:
@ -259,8 +259,8 @@ public:
protected:
SimObject(const SimContext& ctx, const char* name)
: SimObjectBase(ctx, name)
SimObject(const SimContext& ctx, const std::string& name)
: SimObjectBase(ctx, name)
{}
private:
@ -283,9 +283,9 @@ private:
};
class SimContext {
private:
private:
SimContext() {}
friend class SimPlatform;
};
@ -320,10 +320,10 @@ public:
template <typename Pkt>
void schedule(const typename SimCallEvent<Pkt>::Func& callback,
const Pkt& pkt,
uint64_t delay) {
const Pkt& pkt,
uint64_t delay) {
assert(delay != 0);
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
events_.emplace_back(evt);
}
@ -341,10 +341,10 @@ public:
auto evt_it_end = events_.end();
while (evt_it != evt_it_end) {
auto& event = *evt_it;
if (cycles_ >= event->cycles()) {
if (cycles_ >= event->cycles()) {
event->fire();
evt_it = events_.erase(evt_it);
} else {
} else {
++evt_it;
}
}
@ -352,7 +352,7 @@ public:
for (auto& object : objects_) {
object->do_tick();
}
// advance clock
// advance clock
++cycles_;
}
@ -390,8 +390,8 @@ private:
///////////////////////////////////////////////////////////////////////////////
inline SimObjectBase::SimObjectBase(const SimContext&, const char* name)
: name_(name)
inline SimObjectBase::SimObjectBase(const SimContext&, const std::string& name)
: name_(name)
{}
template <typename Impl>
@ -403,8 +403,8 @@ typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
template <typename Pkt>
void SimPort<Pkt>::push(const Pkt& pkt, uint64_t delay) const {
if (peer_ && !tx_cb_) {
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
} else {
SimPlatform::instance().schedule(this, pkt, delay);
}
}
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -47,7 +47,7 @@ public:
, indent_(indent, ' ')
, owner_(nullptr)
{}
explicit IndentStream(std::ostream& dest, int indent = 4)
: dest_(dest.rdbuf())
, isBeginLine_(true)
@ -76,3 +76,14 @@ private:
std::string indent_;
std::ostream* owner_;
};
template <typename... Args>
std::string StrFormat(const std::string& fmt, Args... args) {
auto size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
if (size <= 0) {
throw std::runtime_error("Error during formatting.");
}
std::vector<char> buf(size);
std::snprintf(buf.data(), size, fmt.c_str(), args...);
return std::string(buf.data(), buf.data() + size - 1);
}

View file

@ -430,7 +430,7 @@ public:
continue;
auto& mem_rsp = mem_rsp_port.front();
DT(3, simobject_->name() << "-bank" << bank_id << " fill-rsp: " << mem_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-fill-rsp: " << mem_rsp);
pipeline_req.type = bank_req_t::Fill;
pipeline_req.tag = mem_rsp.tag;
mem_rsp_port.pop();
@ -495,7 +495,7 @@ public:
bank_req.type = bank_req_t::Core;
bank_req.write = core_req.write;
pipeline_req = bank_req;
DT(3, simobject_->name() << " core-req: " << core_req);
DT(3, simobject_->name() << "-core-req: " << core_req);
}
if (core_req.write)
@ -523,7 +523,7 @@ private:
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
}
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
@ -532,13 +532,13 @@ private:
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
uint32_t mem_port = req_id % config_.mem_ports;
nc_arbs_.at(mem_port)->ReqIn.at(1).push(mem_req, 1);
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
DT(3, simobject_->name() << "-bypass-dram-req: " << mem_req);
}
if (core_req.write && config_.write_reponse) {
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1);
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
}
}
@ -568,7 +568,7 @@ private:
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-bank" << bank_id << " replay: " << core_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-replay: " << core_rsp);
}
}
} break;
@ -612,7 +612,7 @@ private:
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
} else {
// mark line as dirty
hit_line.dirty = true;
@ -625,7 +625,7 @@ private:
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
}
}
} else {
@ -644,7 +644,7 @@ private:
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " writeback: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-writeback: " << mem_req);
++perf_stats_.evictions;
}
}
@ -658,7 +658,7 @@ private:
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
}
// send core response
if (config_.write_reponse) {
@ -667,7 +667,7 @@ private:
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
}
}
} else {
@ -676,7 +676,7 @@ private:
// allocate MSHR
auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id);
DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-mshr-enqueue: " << pipeline_req);
// send fill request
if (!mshr_pending) {
@ -687,7 +687,7 @@ private:
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " fill: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-fill: " << mem_req);
++pending_fill_reqs_;
}
}

View file

@ -20,7 +20,7 @@ Cluster::Cluster(const SimContext& ctx,
ProcessorImpl* processor,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "cluster")
: SimObject(ctx, StrFormat("cluster%d", cluster_id))
, mem_req_ports(L2_MEM_PORTS, this)
, mem_rsp_ports(L2_MEM_PORTS, this)
, cluster_id_(cluster_id)
@ -42,7 +42,7 @@ Cluster::Cluster(const SimContext& ctx,
// Create l2cache
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
snprintf(sname, 100, "%s-l2cache", this->name().c_str());
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
!L2_ENABLED,
log2ceil(L2_CACHE_SIZE),// C

View file

@ -34,8 +34,8 @@ inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE);
inline constexpr int L2_NUM_REQS = 2;
inline constexpr int L2_NUM_REQS = NUM_SOCKETS * L1_MEM_PORTS;
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS;
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS * L2_MEM_PORTS;
inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH;

View file

@ -30,7 +30,7 @@ Core::Core(const SimContext& ctx,
Socket* socket,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "core")
: SimObject(ctx, StrFormat("core%d", core_id))
, icache_req_ports(1, this)
, icache_rsp_ports(1, this)
, dcache_req_ports(DCACHE_NUM_REQS, this)
@ -59,12 +59,12 @@ Core::Core(const SimContext& ctx,
// create the memory coalescer
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-coalescer%d", core_id, i);
snprintf(sname, 100, "%s-coalescer%d", this->name().c_str(), i);
mem_coalescers_.at(i) = MemCoalescer::Create(sname, LSU_CHANNELS, DCACHE_CHANNELS, DCACHE_WORD_SIZE, LSUQ_OUT_SIZE, 1);
}
// create local memory
snprintf(sname, 100, "core%d-local_mem", core_id);
snprintf(sname, 100, "%s-local_mem", this->name().c_str());
local_mem_ = LocalMem::Create(sname, LocalMem::Config{
(1 << LMEM_LOG_SIZE),
LSU_WORD_SIZE,
@ -75,19 +75,19 @@ Core::Core(const SimContext& ctx,
// create lsu demux
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
snprintf(sname, 100, "%s-lsu_demux%d", this->name().c_str(), i);
lsu_demux_.at(i) = LocalMemSwitch::Create(sname, 1);
}
// create lsu dcache adapter
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_dcache_adapter%d", core_id, i);
snprintf(sname, 100, "%s-lsu_dcache_adapter%d", this->name().c_str(), i);
lsu_dcache_adapter_.at(i) = LsuMemAdapter::Create(sname, DCACHE_CHANNELS, 1);
}
// create lsu lmem adapter
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_lmem_adapter%d", core_id, i);
snprintf(sname, 100, "%s-lsu_lmem_adapter%d", this->name().c_str(), i);
lsu_lmem_adapter_.at(i) = LsuMemAdapter::Create(sname, LSU_CHANNELS, 1);
}
@ -140,7 +140,7 @@ Core::Core(const SimContext& ctx,
// bind commit arbiters
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
snprintf(sname, 100, "%s-commit-arb%d", this->name().c_str(), i);
auto arbiter = TraceArbiter::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
func_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));

View file

@ -103,7 +103,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
auto reg = instr.getRSrc(i);
switch (type) {
case RegType::Integer:
DPH(2, "Src" << i << " Reg: " << type << reg << "={");
DPH(2, "Src" << i << "-Reg: " << type << reg << "={");
for (uint32_t t = 0; t < num_threads; ++t) {
if (t) DPN(2, ", ");
if (!warp.tmask.test(t)) {
@ -116,7 +116,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
DPN(2, "}" << std::endl);
break;
case RegType::Float:
DPH(2, "Src" << i << " Reg: " << type << reg << "={");
DPH(2, "Src" << i << "-Reg: " << type << reg << "={");
for (uint32_t t = 0; t < num_threads; ++t) {
if (t) DPN(2, ", ");
if (!warp.tmask.test(t)) {
@ -1421,7 +1421,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
std::abort();
}
} break;
case Opcode::TCU:
case Opcode::TCU:
{ //TODO - make it data-type flexible
uint32_t mem_bytes = 1;
DP(3, "mem_bytes=" << mem_bytes << std::endl);
@ -1443,7 +1443,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
//LOAD
if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp)
{
{
num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp;
num_data_per_thread = 1;
}
@ -1456,7 +1456,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
//STORE
if(num_threads > tc_size*tc_size*TC_per_warp)
{
{
num_threads_actv_st = tc_size*tc_size*TC_per_warp;
num_data_per_thread_st = 1;
}
@ -1466,30 +1466,30 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc;
}
data_bytes_store = mem_bytes*num_data_per_thread_st;
DP(3, "Num Tiles=" << n_tiles << std::endl);
switch (func3) {
case 0:
{ //Matrix Load
case 0:
{ //Matrix Load
DP (4, "TCU LOAD");
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::TCU_LOAD;
trace->src_regs[0] = {RegType::Integer, rsrc0};
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
trace->data = trace_data;
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
{
if (!warp.tmask.test(t))
continue;
DP(3, "Thread ID" << t);
DP(3, "Thread ID" << t);
uint32_t base_addr = rsdata[t][0].i ;
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load};
//Load A or B (depends on immsrc)
int loop_offset = 0;
DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <<std::endl);
@ -1502,10 +1502,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]);
}
}
rd_write = true;
rd_write = true;
} break;
case 1:
{
case 1:
{
DP(4, "TCU STORE");
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::TCU_STORE;
@ -1513,12 +1513,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
trace->data = trace_data;
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
{
if (!warp.tmask.test(t))
continue;
DP(3, "Thread ID" << t);
DP(3, "Thread ID" << t);
uint32_t base_addr = rsdata[t][0].i ;
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store};
@ -1529,7 +1529,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0));
*temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n];
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
}
}
//Clear the scratchpad
@ -1539,18 +1539,18 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
}
}
break;
case 2:
case 2:
{ //Matrix Multiply
DP(4, "TCU MULTIPLY MAT");
trace->fu_type = FUType::TCU;
trace->tcu_type = TCUType::TCU_MUL;
uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp);
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
{
if (!warp.tmask.test(t))
continue;
DP(3, "Thread ID" << t);
DP(3, "Thread ID" << t);
//TC operation [only 1 thread in 1 warp needs to do this]
if (t%threads_per_tc == 0)
{
@ -1563,7 +1563,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size;
uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2;
for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation?
{
{
for (int i = 0; i < tc_size; i++) { //ROW-1
for (int j = 0; j < tc_size; j++) { //COL-2
int sum = 0;

View file

@ -121,7 +121,7 @@ void LsuUnit::tick() {
continue;
auto& state = states_.at(b);
auto& lsu_rsp = lsu_rsp_port.front();
DT(3, this->name() << " mem-rsp: " << lsu_rsp);
DT(3, this->name() << "-mem-rsp: " << lsu_rsp);
auto& entry = state.pending_rd_reqs.at(lsu_rsp.tag);
auto trace = entry.trace;
assert(!entry.mask.none());
@ -146,7 +146,7 @@ void LsuUnit::tick() {
continue;
Outputs.at(iw).push(state.fence_trace, 1);
state.fence_lock = false;
DT(3, this->name() << " fence-unlock: " << state.fence_trace);
DT(3, this->name() << "-fence-unlock: " << state.fence_trace);
}
// check input queue
@ -160,7 +160,7 @@ void LsuUnit::tick() {
// schedule fence lock
state.fence_trace = trace;
state.fence_lock = true;
DT(3, this->name() << " fence-lock: " << *trace);
DT(3, this->name() << "-fence-lock: " << *trace);
// remove input
input.pop();
continue;
@ -171,7 +171,7 @@ void LsuUnit::tick() {
// check pending queue capacity
if (!is_write && state.pending_rd_reqs.full()) {
if (!trace->log_once(true)) {
DT(4, "*** " << this->name() << " queue-full: " << *trace);
DT(4, "*** " << this->name() << "-queue-full: " << *trace);
}
continue;
} else {
@ -202,7 +202,7 @@ void LsuUnit::tick() {
// send memory request
core_->lsu_demux_.at(block_idx)->ReqIn.push(lsu_req);
DT(3, this->name() << " mem-req: " << lsu_req);
DT(3, this->name() << "-mem-req: " << lsu_req);
// update stats
auto num_addrs = lsu_req.mask.count();
@ -237,7 +237,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
{
req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4);
}
auto t0 = trace->pid * NUM_LSU_LANES;
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
@ -250,7 +250,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
auto mem_addr = trace_data->mem_addrs.at(t);
auto type = get_addr_type(mem_addr.addr);
// DT(3, "addr_type = " << type << ", " << *trace);
// DT(3, "addr_type = " << type << ", " << *trace);
uint32_t mem_bytes = 1;
for (int i = 0; i < req_per_thread; i++)
{
@ -261,7 +261,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
mem_req.tag = tag;
mem_req.cid = trace->cid;
mem_req.uuid = trace->uuid;
dcache_req_port.push(mem_req, 1);
DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
<< ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace);
@ -272,7 +272,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
++core_->perf_stats_.loads;
++pending_loads_;
}
++count;
}
}
@ -282,7 +282,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
///////////////////////////////////////////////////////////////////////////////
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
: FuncUnit(ctx, core, "TCU")
{}
@ -290,7 +290,7 @@ void TcuUnit::tick() {
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
auto& input = Inputs.at(i);
if (input.empty())
if (input.empty())
continue;
auto& output = Outputs.at(i);
auto trace = input.front();
@ -307,7 +307,7 @@ void TcuUnit::tick() {
}
default:
std::abort();
}
}
DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace);
input.pop();
}

View file

@ -24,8 +24,7 @@ protected:
LocalMem* simobject_;
Config config_;
RAM ram_;
int32_t bank_sel_addr_start_;
int32_t bank_sel_addr_end_;
MemCrossBar::Ptr mem_xbar_;
PerfStats perf_stats_;
uint64_t to_local_addr(uint64_t addr) {
@ -40,9 +39,15 @@ public:
: simobject_(simobject)
, config_(config)
, ram_(config.capacity)
, bank_sel_addr_start_(0)
, bank_sel_addr_end_(config.B-1)
{}
{
char sname[100];
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_reqs, (1 << config.B));
for (uint32_t i = 0; i < config.num_reqs; ++i) {
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
}
}
virtual ~Impl() {}
@ -82,7 +87,7 @@ public:
continue;
}
DT(4, simobject_->name() << " mem-req" << req_id << ": "<< core_req);
DT(4, simobject_->name() << "-mem-req" << req_id << ": "<< core_req);
in_used_banks.at(bank_id) = true;

View file

@ -42,10 +42,10 @@ void MemCoalescer::reset() {
}
void MemCoalescer::tick() {
// process incoming responses
// process outgoing responses
if (!RspOut.empty()) {
auto& out_rsp = RspOut.front();
DT(4, this->name() << " mem-rsp: " << out_rsp);
DT(4, this->name() << "-mem-rsp: " << out_rsp);
auto& entry = pending_rd_reqs_.at(out_rsp.tag);
BitVector<> rsp_mask(input_size_);
@ -89,7 +89,7 @@ void MemCoalescer::tick() {
// ensure we can allocate a response tag
if (pending_rd_reqs_.full()) {
DT(4, "*** " << this->name() << " queue-full: " << in_req);
DT(4, "*** " << this->name() << "-queue-full: " << in_req);
return;
}
@ -145,7 +145,7 @@ void MemCoalescer::tick() {
// send memory request
ReqOut.push(out_req, delay_);
DT(4, this->name() << " mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
DT(4, this->name() << "-mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
// update sent mask
sent_mask_ |= cur_mask;

View file

@ -27,13 +27,14 @@ class MemSim::Impl {
private:
MemSim* simobject_;
Config config_;
MemCrossBar::Ptr mem_xbar_;
DramSim dram_sim_;
PerfStats perf_stats_;
struct DramCallbackArgs {
MemSim* simobject;
MemReq request;
uint32_t i;
MemSim::Impl* memsim;
MemReq request;
uint32_t bank_id;
};
public:
@ -41,7 +42,15 @@ public:
: simobject_(simobject)
, config_(config)
, dram_sim_(MEM_CLOCK_RATIO)
{}
{
char sname[100];
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks);
for (uint32_t i = 0; i < config.num_ports; ++i) {
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
}
}
~Impl() {
//--
@ -59,14 +68,14 @@ public:
dram_sim_.tick();
uint32_t counter = 0;
for (uint32_t i = 0; i < config_.channels; ++i) {
if (simobject_->MemReqPorts.at(i).empty())
for (uint32_t i = 0; i < config_.num_banks; ++i) {
if (mem_xbar_->ReqOut.at(i).empty())
continue;
auto& mem_req = simobject_->MemReqPorts.at(i).front();
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
// try to enqueue the request to the memory system
auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
auto req_args = new DramCallbackArgs{this, mem_req, i};
auto enqueue_success = dram_sim_.send_request(
mem_req.write,
mem_req.addr,
@ -76,8 +85,8 @@ public:
// only send a response for read requests
if (!rsp_args->request.write) {
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp);
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp: bank=" << rsp_args->bank_id << ", " << mem_rsp);
}
delete rsp_args;
},
@ -90,9 +99,9 @@ public:
continue;
}
DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req);
DT(3, simobject_->name() << "-mem-req: bank=" << i << ", " << mem_req);
simobject_->MemReqPorts.at(i).pop();
mem_xbar_->ReqOut.at(i).pop();
counter++;
}
@ -107,8 +116,8 @@ public:
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPorts(config.channels, this)
, MemRspPorts(config.channels, this)
, MemReqPorts(config.num_ports, this)
, MemRspPorts(config.num_ports, this)
, impl_(new Impl(this, config))
{}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,15 +21,15 @@ namespace vortex {
class MemSim : public SimObject<MemSim>{
public:
struct Config {
uint32_t channels;
uint32_t num_cores;
uint32_t num_banks;
uint32_t num_ports;
};
struct PerfStats {
uint64_t counter;
uint64_t ticks;
PerfStats()
PerfStats()
: counter(0)
, ticks(0)
{}
@ -52,7 +52,7 @@ public:
void tick();
const PerfStats& perf_stats() const;
private:
class Impl;
Impl* impl_;

View file

@ -25,7 +25,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
// create memory simulator
memsim_ = MemSim::Create("dram", MemSim::Config{
PLATFORM_MEMORY_BANKS,
uint32_t(arch.num_cores()) * arch.num_clusters()
L3_MEM_PORTS
});
// create clusters

View file

@ -21,7 +21,7 @@ Socket::Socket(const SimContext& ctx,
Cluster* cluster,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "socket")
: SimObject(ctx, StrFormat("socket%d", socket_id))
, mem_req_ports(L1_MEM_PORTS, this)
, mem_rsp_ports(L1_MEM_PORTS, this)
, socket_id_(socket_id)
@ -31,7 +31,7 @@ Socket::Socket(const SimContext& ctx,
auto cores_per_socket = cores_.size();
char sname[100];
snprintf(sname, 100, "socket%d-icaches", socket_id);
snprintf(sname, 100, "%s-icaches", this->name().c_str());
icaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_ICACHES, CacheSim::Config{
!ICACHE_ENABLED,
log2ceil(ICACHE_SIZE), // C
@ -49,7 +49,7 @@ Socket::Socket(const SimContext& ctx,
2, // pipeline latency
});
snprintf(sname, 100, "socket%d-dcaches", socket_id);
snprintf(sname, 100, "%s-dcaches", this->name().c_str());
dcaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_DCACHES, CacheSim::Config{
!DCACHE_ENABLED,
log2ceil(DCACHE_SIZE), // C
@ -70,7 +70,7 @@ Socket::Socket(const SimContext& ctx,
// connect l1 caches to outgoing memory interfaces
for (uint32_t i = 0; i < L1_MEM_PORTS; ++i) {
if (i == 0) {
snprintf(sname, 100, "socket%d-l1_arb%d", socket_id, i);
snprintf(sname, 100, "%s-l1_arb%d", this->name().c_str(), i);
auto l1_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, 2, 1);
icaches_->MemReqPorts.at(0).bind(&l1_arb->ReqIn.at(1));
@ -82,8 +82,8 @@ Socket::Socket(const SimContext& ctx,
l1_arb->ReqOut.at(0).bind(&this->mem_req_ports.at(0));
this->mem_rsp_ports.at(0).bind(&l1_arb->RspOut.at(0));
} else {
this->mem_req_ports.at(i).bind(&dcaches_->MemReqPorts.at(i));
dcaches_->MemRspPorts.at(i).bind(&this->mem_rsp_ports.at(i));
dcaches_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
this->mem_rsp_ports.at(i).bind(&dcaches_->MemRspPorts.at(i));
}
}

View file

@ -32,16 +32,16 @@ LocalMemSwitch::LocalMemSwitch(
void LocalMemSwitch::reset() {}
void LocalMemSwitch::tick() {
// process incoming responses
// process outgoing responses
if (!RspLmem.empty()) {
auto& out_rsp = RspLmem.front();
DT(4, this->name() << " lmem-rsp: " << out_rsp);
DT(4, this->name() << "-lmem-rsp: " << out_rsp);
RspIn.push(out_rsp, 1);
RspLmem.pop();
}
if (!RspDC.empty()) {
auto& out_rsp = RspDC.front();
DT(4, this->name() << " dc-rsp: " << out_rsp);
DT(4, this->name() << "-dc-rsp: " << out_rsp);
RspIn.push(out_rsp, 1);
RspDC.pop();
}
@ -73,12 +73,12 @@ void LocalMemSwitch::tick() {
if (!out_dc_req.mask.none()) {
ReqDC.push(out_dc_req, delay_);
DT(4, this->name() << " dc-req: " << out_dc_req);
DT(4, this->name() << "-dc-req: " << out_dc_req);
}
if (!out_lmem_req.mask.none()) {
ReqLmem.push(out_lmem_req, delay_);
DT(4, this->name() << " lmem-req: " << out_lmem_req);
DT(4, this->name() << "-lmem-req: " << out_lmem_req);
}
ReqIn.pop();
}
@ -104,12 +104,12 @@ void LsuMemAdapter::reset() {}
void LsuMemAdapter::tick() {
uint32_t input_size = ReqOut.size();
// process incoming responses
// process outgoing responses
for (uint32_t i = 0; i < input_size; ++i) {
if (RspOut.at(i).empty())
continue;
auto& out_rsp = RspOut.at(i).front();
DT(4, this->name() << " rsp" << i << ": " << out_rsp);
DT(4, this->name() << "-rsp" << i << ": " << out_rsp);
// build memory response
LsuRsp in_rsp(input_size);
@ -155,7 +155,7 @@ void LsuMemAdapter::tick() {
// send memory request
ReqOut.at(i).push(out_req, delay_);
DT(4, this->name() << " req" << i << ": " << out_req);
DT(4, this->name() << "-req" << i << ": " << out_req);
}
}
ReqIn.pop();

View file

@ -483,12 +483,12 @@ public:
, Outputs(num_outputs, this)
, type_(type)
, delay_(delay)
, cursors_(num_outputs, 0)
, grants_(num_outputs, 0)
, num_reqs_(log2ceil(num_inputs / num_outputs))
{
assert(delay != 0);
assert(num_inputs <= 32);
assert(num_outputs <= 32);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(num_inputs >= num_outputs);
// bypass mode
@ -500,8 +500,8 @@ public:
}
void reset() {
for (auto& cursor : cursors_) {
cursor = 0;
for (auto& grant : grants_) {
grant = 0;
}
}
@ -517,8 +517,8 @@ public:
// process inputs
for (uint32_t o = 0; o < O; ++o) {
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (cursors_.at(o) + r) & (R-1);
uint32_t j = o * R + i;
uint32_t g = (grants_.at(o) + r) & (R-1);
uint32_t j = o * R + g;
if (j >= I)
continue;
@ -527,29 +527,132 @@ public:
auto& req = req_in.front();
Outputs.at(o).push(req, delay_);
req_in.pop();
this->update_cursor(o, i);
this->update_grant(o, g);
break;
}
}
}
}
private:
protected:
void update_cursor(uint32_t index, uint32_t grant) {
void update_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
cursors_.at(index) = grant + 1;
grants_.at(index) = grant + 1;
}
}
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> cursors_;
std::vector<uint32_t> grants_;
uint32_t num_reqs_;
};
///////////////////////////////////////////////////////////////////////////////
template <typename Type>
class CrossBar : public SimObject<CrossBar<Type>> {
public:
std::vector<SimPort<Type>> Inputs;
std::vector<SimPort<Type>> Outputs;
CrossBar(
const SimContext& ctx,
const char* name,
ArbiterType type,
uint32_t num_inputs,
uint32_t num_outputs = 1,
uint32_t addr_start = 0,
uint32_t delay = 1
)
: SimObject<CrossBar<Type>>(ctx, name)
, Inputs(num_inputs, this)
, Outputs(num_outputs, this)
, type_(type)
, delay_(delay)
, grants_(num_outputs, 0)
, lg_num_reqs_(log2ceil(num_inputs))
, addr_start_(addr_start)
, addr_end_(num_outputs-1)
, collisions_(0) {
assert(delay != 0);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(ispow2(num_outputs));
}
void reset() {
for (auto& grant : grants_) {
grant = 0;
}
}
void tick() {
uint32_t I = Inputs.size();
uint32_t O = Outputs.size();
uint32_t R = 1 << lg_num_reqs_;
// process incoming requests
for (uint32_t o = 0; o < O; ++o) {
int32_t input_idx = -1;
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (grants_.at(o) + r) & (R-1);
if (i >= I)
continue;
auto& req_in = Inputs.at(i);
if (!req_in.empty()) {
auto& req = req_in.front();
// skip if input is not going to this output
uint32_t output_idx = 0;
if (O != 1) {
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_end_);
}
if (output_idx != o)
continue;
if (input_idx != -1) {
++collisions_;
continue;
}
input_idx = i;
}
}
if (input_idx != -1) {
auto& req_in = Inputs.at(input_idx);
auto& req = req_in.front();
if (lg_num_reqs_ != 0) {
req.tag = (req.tag << lg_num_reqs_) | input_idx;
}
DT(4, this->name() << "-req" << input_idx << ": " << req);
Outputs.at(o).push(req, delay_);
req_in.pop();
this->update_grant(o, input_idx);
}
}
}
uint64_t collisions() const {
return collisions_;
}
protected:
void update_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
grants_.at(index) = grant + 1;
}
}
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> grants_;
uint32_t lg_num_reqs_;
uint32_t addr_start_;
uint32_t addr_end_;
uint64_t collisions_;
};
///////////////////////////////////////////////////////////////////////////////
template <typename Req, typename Rsp>
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
public:
@ -574,12 +677,12 @@ public:
, RspOut(num_outputs, this)
, type_(type)
, delay_(delay)
, cursors_(num_outputs, 0)
, grants_(num_outputs, 0)
, lg_num_reqs_(log2ceil(num_inputs / num_outputs))
{
assert(delay != 0);
assert(num_inputs <= 32);
assert(num_outputs <= 32);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(num_inputs >= num_outputs);
// bypass mode
@ -592,8 +695,8 @@ public:
}
void reset() {
for (auto& cursor : cursors_) {
cursor = 0;
for (auto& grant : grants_) {
grant = 0;
}
}
@ -606,25 +709,28 @@ public:
if (I == O)
return;
// process outgoing responses
for (uint32_t o = 0; o < O; ++o) {
// process incoming responses
if (!RspOut.at(o).empty()) {
auto& rsp = RspOut.at(o).front();
uint32_t i = 0;
auto& rsp_out = RspOut.at(o);
if (!rsp_out.empty()) {
auto& rsp = rsp_out.front();
uint32_t g = 0;
if (lg_num_reqs_ != 0) {
i = rsp.tag & (R-1);
g = rsp.tag & (R-1);
rsp.tag >>= lg_num_reqs_;
}
DT(4, this->name() << " rsp" << o << ": " << rsp);
uint32_t j = o * R + i;
DT(4, this->name() << "-rsp" << o << ": " << rsp);
uint32_t j = o * R + g;
RspIn.at(j).push(rsp, 1);
RspOut.at(o).pop();
rsp_out.pop();
}
}
// process incoming requests
// process incoming requests
for (uint32_t o = 0; o < O; ++o) {
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (cursors_.at(o) + r) & (R-1);
uint32_t j = o * R + i;
uint32_t g = (grants_.at(o) + r) & (R-1);
uint32_t j = o * R + g;
if (j >= I)
continue;
@ -632,32 +738,193 @@ public:
if (!req_in.empty()) {
auto& req = req_in.front();
if (lg_num_reqs_ != 0) {
req.tag = (req.tag << lg_num_reqs_) | i;
req.tag = (req.tag << lg_num_reqs_) | g;
}
DT(4, this->name() << " req" << j << ": " << req);
DT(4, this->name() << "-req" << j << ": " << req);
ReqOut.at(o).push(req, delay_);
req_in.pop();
this->update_cursor(o, i);
this->update_grant(o, g);
break;
}
}
}
}
void update_cursor(uint32_t index, uint32_t grant) {
protected:
void update_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
cursors_.at(index) = grant + 1;
grants_.at(index) = grant + 1;
}
}
private:
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> cursors_;
std::vector<uint32_t> grants_;
uint32_t lg_num_reqs_;
};
using MemArbiter = TxArbiter<MemReq, MemRsp>;
///////////////////////////////////////////////////////////////////////////////
template <typename Req, typename Rsp>
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
public:
std::vector<SimPort<Req>> ReqIn;
std::vector<SimPort<Rsp>> RspIn;
std::vector<SimPort<Req>> ReqOut;
std::vector<SimPort<Rsp>> RspOut;
TxCrossBar(
const SimContext& ctx,
const char* name,
ArbiterType type,
uint32_t num_inputs,
uint32_t num_outputs = 1,
uint32_t addr_start = 0,
uint32_t delay = 1
)
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
, ReqIn(num_inputs, this)
, RspIn(num_inputs, this)
, ReqOut(num_outputs, this)
, RspOut(num_outputs, this)
, type_(type)
, delay_(delay)
, req_grants_(num_outputs, 0)
, rsp_grants_(num_inputs, 0)
, lg_num_reqs_(log2ceil(num_inputs))
, lg_num_rsps_(log2ceil(num_outputs))
, addr_start_(addr_start)
, addr_end_(num_outputs-1)
, collisions_(0) {
assert(delay != 0);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(ispow2(num_inputs));
assert(ispow2(num_outputs));
}
void reset() {
for (auto& grant : req_grants_) {
grant = 0;
}
for (auto& grant : rsp_grants_) {
grant = 0;
}
}
void tick() {
uint32_t I = ReqIn.size();
uint32_t O = ReqOut.size();
uint32_t R = 1 << lg_num_reqs_;
uint32_t T = 1 << lg_num_rsps_;
// process outgoing responses
for (uint32_t i = 0; i < I; ++i) {
int32_t output_idx = -1;
for (uint32_t t = 0; t < T; ++t) {
uint32_t o = (rsp_grants_.at(i) + t) & (T-1);
if (o >= O)
continue;
auto& rsp_out = RspOut.at(o);
if (!rsp_out.empty()) {
auto& rsp = rsp_out.front();
// skip if response is not going to current input
uint32_t input_idx = 0;
if (lg_num_reqs_ != 0) {
input_idx = rsp.tag & (R-1);
}
if (input_idx != i)
continue;
if (output_idx != -1) {
++collisions_;
continue;
}
output_idx = o;
}
}
if (output_idx != -1) {
auto& rsp_out = RspOut.at(output_idx);
auto& rsp = rsp_out.front();
uint32_t input_idx = 0;
if (lg_num_reqs_ != 0) {
input_idx = rsp.tag & (R-1);
rsp.tag >>= lg_num_reqs_;
}
DT(4, this->name() << "-rsp" << output_idx << ": " << rsp);
RspIn.at(input_idx).push(rsp, 1);
rsp_out.pop();
this->update_rsp_grant(i, output_idx);
}
}
// process incoming requests
for (uint32_t o = 0; o < O; ++o) {
int32_t input_idx = -1;
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (req_grants_.at(o) + r) & (R-1);
if (i >= I)
continue;
auto& req_in = ReqIn.at(i);
if (!req_in.empty()) {
auto& req = req_in.front();
// skip if request is not going to current output
uint32_t output_idx = 0;
if (O != 1) {
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_end_);
}
if (output_idx != o)
continue;
if (input_idx != -1) {
++collisions_;
continue;
}
input_idx = i;
}
}
if (input_idx != -1) {
auto& req_in = ReqIn.at(input_idx);
auto& req = req_in.front();
if (lg_num_reqs_ != 0) {
req.tag = (req.tag << lg_num_reqs_) | input_idx;
}
DT(4, this->name() << "-req" << input_idx << ": " << req);
ReqOut.at(o).push(req, delay_);
req_in.pop();
this->update_req_grant(o, input_idx);
}
}
}
uint64_t collisions() const {
return collisions_;
}
protected:
void update_req_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
req_grants_.at(index) = grant + 1;
}
}
void update_rsp_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
rsp_grants_.at(index) = grant + 1;
}
}
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> req_grants_;
std::vector<uint32_t> rsp_grants_;
uint32_t lg_num_reqs_;
uint32_t lg_num_rsps_;
uint32_t addr_start_;
uint32_t addr_end_;
uint64_t collisions_;
};
///////////////////////////////////////////////////////////////////////////////
@ -711,4 +978,6 @@ private:
uint32_t delay_;
};
using MemArbiter = TxArbiter<MemReq, MemRsp>;
using MemCrossBar = TxCrossBar<MemReq, MemRsp>;
}