mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
parent
30b0daf050
commit
3ace9bbeda
18 changed files with 476 additions and 178 deletions
|
@ -105,7 +105,7 @@ regression()
|
|||
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
|
||||
|
||||
# test for matmul
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
@ -322,6 +322,10 @@ config2()
|
|||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test memory ports
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo --threads=32
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
|
|
|
@ -648,9 +648,9 @@
|
|||
// Number of Memory Ports
|
||||
`ifndef L1_MEM_PORTS
|
||||
`ifdef L1_DISABLE
|
||||
`define L1_MEM_PORTS `L2_MEM_PORTS
|
||||
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`else
|
||||
`define L1_MEM_PORTS `MIN(`L2_MEM_PORTS, `DCACHE_NUM_BANKS)
|
||||
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
@ -727,9 +727,9 @@
|
|||
// Number of Memory Ports
|
||||
`ifndef L2_MEM_PORTS
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_MEM_PORTS `MIN(`L3_MEM_PORTS, `L2_NUM_BANKS)
|
||||
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`else
|
||||
`define L2_MEM_PORTS `L3_MEM_PORTS
|
||||
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
@ -788,9 +788,9 @@
|
|||
// Number of Memory Ports
|
||||
`ifndef L3_MEM_PORTS
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_MEM_PORTS `MIN(`PLATFORM_MEMORY_BANKS, `L3_NUM_BANKS)
|
||||
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`else
|
||||
`define L3_MEM_PORTS `PLATFORM_MEMORY_BANKS
|
||||
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -27,9 +27,9 @@ class SimObjectBase;
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SimPortBase {
|
||||
public:
|
||||
public:
|
||||
virtual ~SimPortBase() {}
|
||||
|
||||
|
||||
SimObjectBase* module() const {
|
||||
return module_;
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ public:
|
|||
auto cycles = queue_.front().cycles;
|
||||
queue_.pop();
|
||||
return cycles;
|
||||
}
|
||||
}
|
||||
|
||||
void tx_callback(const TxCallback& callback) {
|
||||
tx_cb_ = callback;
|
||||
|
@ -137,7 +137,7 @@ public:
|
|||
typedef std::shared_ptr<SimEventBase> Ptr;
|
||||
|
||||
virtual ~SimEventBase() {}
|
||||
|
||||
|
||||
virtual void fire() const = 0;
|
||||
|
||||
uint64_t cycles() const {
|
||||
|
@ -161,7 +161,7 @@ public:
|
|||
|
||||
typedef std::function<void (const Pkt&)> Func;
|
||||
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, func_(func)
|
||||
, pkt_(pkt)
|
||||
|
@ -194,8 +194,8 @@ public:
|
|||
const_cast<SimPort<Pkt>*>(port_)->transfer(pkt_, cycles_);
|
||||
}
|
||||
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, port_(port)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
|
@ -209,7 +209,7 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
const SimPort<Pkt>* port_;
|
||||
const SimPort<Pkt>* port_;
|
||||
Pkt pkt_;
|
||||
|
||||
static MemoryPool<SimPortEvent<Pkt>> allocator_;
|
||||
|
@ -230,11 +230,11 @@ public:
|
|||
|
||||
const std::string& name() const {
|
||||
return name_;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
SimObjectBase(const SimContext& ctx, const char* name);
|
||||
SimObjectBase(const SimContext& ctx, const std::string& name);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -259,8 +259,8 @@ public:
|
|||
|
||||
protected:
|
||||
|
||||
SimObject(const SimContext& ctx, const char* name)
|
||||
: SimObjectBase(ctx, name)
|
||||
SimObject(const SimContext& ctx, const std::string& name)
|
||||
: SimObjectBase(ctx, name)
|
||||
{}
|
||||
|
||||
private:
|
||||
|
@ -283,9 +283,9 @@ private:
|
|||
};
|
||||
|
||||
class SimContext {
|
||||
private:
|
||||
private:
|
||||
SimContext() {}
|
||||
|
||||
|
||||
friend class SimPlatform;
|
||||
};
|
||||
|
||||
|
@ -320,10 +320,10 @@ public:
|
|||
|
||||
template <typename Pkt>
|
||||
void schedule(const typename SimCallEvent<Pkt>::Func& callback,
|
||||
const Pkt& pkt,
|
||||
uint64_t delay) {
|
||||
const Pkt& pkt,
|
||||
uint64_t delay) {
|
||||
assert(delay != 0);
|
||||
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
|
||||
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
|
||||
events_.emplace_back(evt);
|
||||
}
|
||||
|
||||
|
@ -341,10 +341,10 @@ public:
|
|||
auto evt_it_end = events_.end();
|
||||
while (evt_it != evt_it_end) {
|
||||
auto& event = *evt_it;
|
||||
if (cycles_ >= event->cycles()) {
|
||||
if (cycles_ >= event->cycles()) {
|
||||
event->fire();
|
||||
evt_it = events_.erase(evt_it);
|
||||
} else {
|
||||
} else {
|
||||
++evt_it;
|
||||
}
|
||||
}
|
||||
|
@ -352,7 +352,7 @@ public:
|
|||
for (auto& object : objects_) {
|
||||
object->do_tick();
|
||||
}
|
||||
// advance clock
|
||||
// advance clock
|
||||
++cycles_;
|
||||
}
|
||||
|
||||
|
@ -390,8 +390,8 @@ private:
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline SimObjectBase::SimObjectBase(const SimContext&, const char* name)
|
||||
: name_(name)
|
||||
inline SimObjectBase::SimObjectBase(const SimContext&, const std::string& name)
|
||||
: name_(name)
|
||||
{}
|
||||
|
||||
template <typename Impl>
|
||||
|
@ -403,8 +403,8 @@ typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
|
|||
template <typename Pkt>
|
||||
void SimPort<Pkt>::push(const Pkt& pkt, uint64_t delay) const {
|
||||
if (peer_ && !tx_cb_) {
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
|
||||
} else {
|
||||
SimPlatform::instance().schedule(this, pkt, delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -47,7 +47,7 @@ public:
|
|||
, indent_(indent, ' ')
|
||||
, owner_(nullptr)
|
||||
{}
|
||||
|
||||
|
||||
explicit IndentStream(std::ostream& dest, int indent = 4)
|
||||
: dest_(dest.rdbuf())
|
||||
, isBeginLine_(true)
|
||||
|
@ -76,3 +76,14 @@ private:
|
|||
std::string indent_;
|
||||
std::ostream* owner_;
|
||||
};
|
||||
|
||||
template <typename... Args>
|
||||
std::string StrFormat(const std::string& fmt, Args... args) {
|
||||
auto size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
|
||||
if (size <= 0) {
|
||||
throw std::runtime_error("Error during formatting.");
|
||||
}
|
||||
std::vector<char> buf(size);
|
||||
std::snprintf(buf.data(), size, fmt.c_str(), args...);
|
||||
return std::string(buf.data(), buf.data() + size - 1);
|
||||
}
|
|
@ -430,7 +430,7 @@ public:
|
|||
continue;
|
||||
|
||||
auto& mem_rsp = mem_rsp_port.front();
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " fill-rsp: " << mem_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-fill-rsp: " << mem_rsp);
|
||||
pipeline_req.type = bank_req_t::Fill;
|
||||
pipeline_req.tag = mem_rsp.tag;
|
||||
mem_rsp_port.pop();
|
||||
|
@ -495,7 +495,7 @@ public:
|
|||
bank_req.type = bank_req_t::Core;
|
||||
bank_req.write = core_req.write;
|
||||
pipeline_req = bank_req;
|
||||
DT(3, simobject_->name() << " core-req: " << core_req);
|
||||
DT(3, simobject_->name() << "-core-req: " << core_req);
|
||||
}
|
||||
|
||||
if (core_req.write)
|
||||
|
@ -523,7 +523,7 @@ private:
|
|||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
|
||||
}
|
||||
|
||||
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
|
||||
|
@ -532,13 +532,13 @@ private:
|
|||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
uint32_t mem_port = req_id % config_.mem_ports;
|
||||
nc_arbs_.at(mem_port)->ReqIn.at(1).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bypass-dram-req: " << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1);
|
||||
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -568,7 +568,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " replay: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-replay: " << core_rsp);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
@ -612,7 +612,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
|
||||
} else {
|
||||
// mark line as dirty
|
||||
hit_line.dirty = true;
|
||||
|
@ -625,7 +625,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -644,7 +644,7 @@ private:
|
|||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " writeback: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-writeback: " << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
@ -658,7 +658,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
|
@ -667,7 +667,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -676,7 +676,7 @@ private:
|
|||
|
||||
// allocate MSHR
|
||||
auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-mshr-enqueue: " << pipeline_req);
|
||||
|
||||
// send fill request
|
||||
if (!mshr_pending) {
|
||||
|
@ -687,7 +687,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " fill: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-fill: " << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
ProcessorImpl* processor,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "cluster")
|
||||
: SimObject(ctx, StrFormat("cluster%d", cluster_id))
|
||||
, mem_req_ports(L2_MEM_PORTS, this)
|
||||
, mem_rsp_ports(L2_MEM_PORTS, this)
|
||||
, cluster_id_(cluster_id)
|
||||
|
@ -42,7 +42,7 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
|
||||
// Create l2cache
|
||||
|
||||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||
snprintf(sname, 100, "%s-l2cache", this->name().c_str());
|
||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||
!L2_ENABLED,
|
||||
log2ceil(L2_CACHE_SIZE),// C
|
||||
|
|
|
@ -34,8 +34,8 @@ inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
|
|||
|
||||
inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE);
|
||||
|
||||
inline constexpr int L2_NUM_REQS = 2;
|
||||
inline constexpr int L2_NUM_REQS = NUM_SOCKETS * L1_MEM_PORTS;
|
||||
|
||||
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS;
|
||||
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS * L2_MEM_PORTS;
|
||||
|
||||
inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH;
|
|
@ -30,7 +30,7 @@ Core::Core(const SimContext& ctx,
|
|||
Socket* socket,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "core")
|
||||
: SimObject(ctx, StrFormat("core%d", core_id))
|
||||
, icache_req_ports(1, this)
|
||||
, icache_rsp_ports(1, this)
|
||||
, dcache_req_ports(DCACHE_NUM_REQS, this)
|
||||
|
@ -59,12 +59,12 @@ Core::Core(const SimContext& ctx,
|
|||
|
||||
// create the memory coalescer
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-coalescer%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-coalescer%d", this->name().c_str(), i);
|
||||
mem_coalescers_.at(i) = MemCoalescer::Create(sname, LSU_CHANNELS, DCACHE_CHANNELS, DCACHE_WORD_SIZE, LSUQ_OUT_SIZE, 1);
|
||||
}
|
||||
|
||||
// create local memory
|
||||
snprintf(sname, 100, "core%d-local_mem", core_id);
|
||||
snprintf(sname, 100, "%s-local_mem", this->name().c_str());
|
||||
local_mem_ = LocalMem::Create(sname, LocalMem::Config{
|
||||
(1 << LMEM_LOG_SIZE),
|
||||
LSU_WORD_SIZE,
|
||||
|
@ -75,19 +75,19 @@ Core::Core(const SimContext& ctx,
|
|||
|
||||
// create lsu demux
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-lsu_demux%d", this->name().c_str(), i);
|
||||
lsu_demux_.at(i) = LocalMemSwitch::Create(sname, 1);
|
||||
}
|
||||
|
||||
// create lsu dcache adapter
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_dcache_adapter%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-lsu_dcache_adapter%d", this->name().c_str(), i);
|
||||
lsu_dcache_adapter_.at(i) = LsuMemAdapter::Create(sname, DCACHE_CHANNELS, 1);
|
||||
}
|
||||
|
||||
// create lsu lmem adapter
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_lmem_adapter%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-lsu_lmem_adapter%d", this->name().c_str(), i);
|
||||
lsu_lmem_adapter_.at(i) = LsuMemAdapter::Create(sname, LSU_CHANNELS, 1);
|
||||
}
|
||||
|
||||
|
@ -140,7 +140,7 @@ Core::Core(const SimContext& ctx,
|
|||
|
||||
// bind commit arbiters
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-commit-arb%d", this->name().c_str(), i);
|
||||
auto arbiter = TraceArbiter::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
|
||||
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
|
||||
func_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
|
||||
|
|
|
@ -103,7 +103,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
auto reg = instr.getRSrc(i);
|
||||
switch (type) {
|
||||
case RegType::Integer:
|
||||
DPH(2, "Src" << i << " Reg: " << type << reg << "={");
|
||||
DPH(2, "Src" << i << "-Reg: " << type << reg << "={");
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (t) DPN(2, ", ");
|
||||
if (!warp.tmask.test(t)) {
|
||||
|
@ -116,7 +116,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
DPN(2, "}" << std::endl);
|
||||
break;
|
||||
case RegType::Float:
|
||||
DPH(2, "Src" << i << " Reg: " << type << reg << "={");
|
||||
DPH(2, "Src" << i << "-Reg: " << type << reg << "={");
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
if (t) DPN(2, ", ");
|
||||
if (!warp.tmask.test(t)) {
|
||||
|
@ -1421,7 +1421,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
std::abort();
|
||||
}
|
||||
} break;
|
||||
case Opcode::TCU:
|
||||
case Opcode::TCU:
|
||||
{ //TODO - make it data-type flexible
|
||||
uint32_t mem_bytes = 1;
|
||||
DP(3, "mem_bytes=" << mem_bytes << std::endl);
|
||||
|
@ -1443,7 +1443,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
|
||||
//LOAD
|
||||
if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp)
|
||||
{
|
||||
{
|
||||
num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp;
|
||||
num_data_per_thread = 1;
|
||||
}
|
||||
|
@ -1456,7 +1456,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
|
||||
//STORE
|
||||
if(num_threads > tc_size*tc_size*TC_per_warp)
|
||||
{
|
||||
{
|
||||
num_threads_actv_st = tc_size*tc_size*TC_per_warp;
|
||||
num_data_per_thread_st = 1;
|
||||
}
|
||||
|
@ -1466,30 +1466,30 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc;
|
||||
}
|
||||
data_bytes_store = mem_bytes*num_data_per_thread_st;
|
||||
|
||||
|
||||
DP(3, "Num Tiles=" << n_tiles << std::endl);
|
||||
|
||||
|
||||
switch (func3) {
|
||||
case 0:
|
||||
{ //Matrix Load
|
||||
case 0:
|
||||
{ //Matrix Load
|
||||
|
||||
DP (4, "TCU LOAD");
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::TCU_LOAD;
|
||||
|
||||
|
||||
trace->src_regs[0] = {RegType::Integer, rsrc0};
|
||||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
trace->data = trace_data;
|
||||
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
{
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
DP(3, "Thread ID" << t);
|
||||
DP(3, "Thread ID" << t);
|
||||
|
||||
uint32_t base_addr = rsdata[t][0].i ;
|
||||
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load};
|
||||
|
||||
|
||||
//Load A or B (depends on immsrc)
|
||||
int loop_offset = 0;
|
||||
DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <<std::endl);
|
||||
|
@ -1502,10 +1502,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]);
|
||||
}
|
||||
}
|
||||
rd_write = true;
|
||||
rd_write = true;
|
||||
} break;
|
||||
case 1:
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
DP(4, "TCU STORE");
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::TCU_STORE;
|
||||
|
@ -1513,12 +1513,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
trace->data = trace_data;
|
||||
|
||||
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
|
||||
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
|
||||
{
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
|
||||
DP(3, "Thread ID" << t);
|
||||
DP(3, "Thread ID" << t);
|
||||
uint32_t base_addr = rsdata[t][0].i ;
|
||||
|
||||
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store};
|
||||
|
@ -1529,7 +1529,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0));
|
||||
*temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n];
|
||||
|
||||
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
|
||||
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
|
||||
}
|
||||
}
|
||||
//Clear the scratchpad
|
||||
|
@ -1539,18 +1539,18 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 2:
|
||||
{ //Matrix Multiply
|
||||
DP(4, "TCU MULTIPLY MAT");
|
||||
trace->fu_type = FUType::TCU;
|
||||
trace->tcu_type = TCUType::TCU_MUL;
|
||||
uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp);
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
{
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
|
||||
DP(3, "Thread ID" << t);
|
||||
|
||||
DP(3, "Thread ID" << t);
|
||||
//TC operation [only 1 thread in 1 warp needs to do this]
|
||||
if (t%threads_per_tc == 0)
|
||||
{
|
||||
|
@ -1563,7 +1563,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size;
|
||||
uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2;
|
||||
for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation?
|
||||
{
|
||||
{
|
||||
for (int i = 0; i < tc_size; i++) { //ROW-1
|
||||
for (int j = 0; j < tc_size; j++) { //COL-2
|
||||
int sum = 0;
|
||||
|
|
|
@ -121,7 +121,7 @@ void LsuUnit::tick() {
|
|||
continue;
|
||||
auto& state = states_.at(b);
|
||||
auto& lsu_rsp = lsu_rsp_port.front();
|
||||
DT(3, this->name() << " mem-rsp: " << lsu_rsp);
|
||||
DT(3, this->name() << "-mem-rsp: " << lsu_rsp);
|
||||
auto& entry = state.pending_rd_reqs.at(lsu_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
assert(!entry.mask.none());
|
||||
|
@ -146,7 +146,7 @@ void LsuUnit::tick() {
|
|||
continue;
|
||||
Outputs.at(iw).push(state.fence_trace, 1);
|
||||
state.fence_lock = false;
|
||||
DT(3, this->name() << " fence-unlock: " << state.fence_trace);
|
||||
DT(3, this->name() << "-fence-unlock: " << state.fence_trace);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
|
@ -160,7 +160,7 @@ void LsuUnit::tick() {
|
|||
// schedule fence lock
|
||||
state.fence_trace = trace;
|
||||
state.fence_lock = true;
|
||||
DT(3, this->name() << " fence-lock: " << *trace);
|
||||
DT(3, this->name() << "-fence-lock: " << *trace);
|
||||
// remove input
|
||||
input.pop();
|
||||
continue;
|
||||
|
@ -171,7 +171,7 @@ void LsuUnit::tick() {
|
|||
// check pending queue capacity
|
||||
if (!is_write && state.pending_rd_reqs.full()) {
|
||||
if (!trace->log_once(true)) {
|
||||
DT(4, "*** " << this->name() << " queue-full: " << *trace);
|
||||
DT(4, "*** " << this->name() << "-queue-full: " << *trace);
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
|
@ -202,7 +202,7 @@ void LsuUnit::tick() {
|
|||
|
||||
// send memory request
|
||||
core_->lsu_demux_.at(block_idx)->ReqIn.push(lsu_req);
|
||||
DT(3, this->name() << " mem-req: " << lsu_req);
|
||||
DT(3, this->name() << "-mem-req: " << lsu_req);
|
||||
|
||||
// update stats
|
||||
auto num_addrs = lsu_req.mask.count();
|
||||
|
@ -237,7 +237,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
{
|
||||
req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4);
|
||||
}
|
||||
|
||||
|
||||
auto t0 = trace->pid * NUM_LSU_LANES;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
|
||||
|
@ -250,7 +250,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
|
||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
||||
auto type = get_addr_type(mem_addr.addr);
|
||||
// DT(3, "addr_type = " << type << ", " << *trace);
|
||||
// DT(3, "addr_type = " << type << ", " << *trace);
|
||||
uint32_t mem_bytes = 1;
|
||||
for (int i = 0; i < req_per_thread; i++)
|
||||
{
|
||||
|
@ -261,7 +261,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
mem_req.tag = tag;
|
||||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
|
||||
dcache_req_port.push(mem_req, 1);
|
||||
DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace);
|
||||
|
@ -272,7 +272,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
++core_->perf_stats_.loads;
|
||||
++pending_loads_;
|
||||
}
|
||||
|
||||
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
@ -282,7 +282,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
|
||||
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
|
||||
: FuncUnit(ctx, core, "TCU")
|
||||
{}
|
||||
|
||||
|
@ -290,7 +290,7 @@ void TcuUnit::tick() {
|
|||
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& input = Inputs.at(i);
|
||||
if (input.empty())
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
|
@ -307,7 +307,7 @@ void TcuUnit::tick() {
|
|||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace);
|
||||
input.pop();
|
||||
}
|
||||
|
|
|
@ -24,8 +24,7 @@ protected:
|
|||
LocalMem* simobject_;
|
||||
Config config_;
|
||||
RAM ram_;
|
||||
int32_t bank_sel_addr_start_;
|
||||
int32_t bank_sel_addr_end_;
|
||||
MemCrossBar::Ptr mem_xbar_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
uint64_t to_local_addr(uint64_t addr) {
|
||||
|
@ -40,9 +39,15 @@ public:
|
|||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, ram_(config.capacity)
|
||||
, bank_sel_addr_start_(0)
|
||||
, bank_sel_addr_end_(config.B-1)
|
||||
{}
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_reqs, (1 << config.B));
|
||||
for (uint32_t i = 0; i < config.num_reqs; ++i) {
|
||||
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~Impl() {}
|
||||
|
||||
|
@ -82,7 +87,7 @@ public:
|
|||
continue;
|
||||
}
|
||||
|
||||
DT(4, simobject_->name() << " mem-req" << req_id << ": "<< core_req);
|
||||
DT(4, simobject_->name() << "-mem-req" << req_id << ": "<< core_req);
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
|
|
|
@ -42,10 +42,10 @@ void MemCoalescer::reset() {
|
|||
}
|
||||
|
||||
void MemCoalescer::tick() {
|
||||
// process incoming responses
|
||||
// process outgoing responses
|
||||
if (!RspOut.empty()) {
|
||||
auto& out_rsp = RspOut.front();
|
||||
DT(4, this->name() << " mem-rsp: " << out_rsp);
|
||||
DT(4, this->name() << "-mem-rsp: " << out_rsp);
|
||||
auto& entry = pending_rd_reqs_.at(out_rsp.tag);
|
||||
|
||||
BitVector<> rsp_mask(input_size_);
|
||||
|
@ -89,7 +89,7 @@ void MemCoalescer::tick() {
|
|||
|
||||
// ensure we can allocate a response tag
|
||||
if (pending_rd_reqs_.full()) {
|
||||
DT(4, "*** " << this->name() << " queue-full: " << in_req);
|
||||
DT(4, "*** " << this->name() << "-queue-full: " << in_req);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -145,7 +145,7 @@ void MemCoalescer::tick() {
|
|||
|
||||
// send memory request
|
||||
ReqOut.push(out_req, delay_);
|
||||
DT(4, this->name() << " mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
|
||||
DT(4, this->name() << "-mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
|
||||
|
||||
// update sent mask
|
||||
sent_mask_ |= cur_mask;
|
||||
|
|
|
@ -27,13 +27,14 @@ class MemSim::Impl {
|
|||
private:
|
||||
MemSim* simobject_;
|
||||
Config config_;
|
||||
MemCrossBar::Ptr mem_xbar_;
|
||||
DramSim dram_sim_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
struct DramCallbackArgs {
|
||||
MemSim* simobject;
|
||||
MemReq request;
|
||||
uint32_t i;
|
||||
MemSim::Impl* memsim;
|
||||
MemReq request;
|
||||
uint32_t bank_id;
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -41,7 +42,15 @@ public:
|
|||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
{}
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks);
|
||||
for (uint32_t i = 0; i < config.num_ports; ++i) {
|
||||
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
//--
|
||||
|
@ -59,14 +68,14 @@ public:
|
|||
dram_sim_.tick();
|
||||
uint32_t counter = 0;
|
||||
|
||||
for (uint32_t i = 0; i < config_.channels; ++i) {
|
||||
if (simobject_->MemReqPorts.at(i).empty())
|
||||
for (uint32_t i = 0; i < config_.num_banks; ++i) {
|
||||
if (mem_xbar_->ReqOut.at(i).empty())
|
||||
continue;
|
||||
|
||||
auto& mem_req = simobject_->MemReqPorts.at(i).front();
|
||||
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
|
||||
|
||||
// try to enqueue the request to the memory system
|
||||
auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
|
||||
auto req_args = new DramCallbackArgs{this, mem_req, i};
|
||||
auto enqueue_success = dram_sim_.send_request(
|
||||
mem_req.write,
|
||||
mem_req.addr,
|
||||
|
@ -76,8 +85,8 @@ public:
|
|||
// only send a response for read requests
|
||||
if (!rsp_args->request.write) {
|
||||
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
||||
rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
|
||||
DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp);
|
||||
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
|
||||
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp: bank=" << rsp_args->bank_id << ", " << mem_rsp);
|
||||
}
|
||||
delete rsp_args;
|
||||
},
|
||||
|
@ -90,9 +99,9 @@ public:
|
|||
continue;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req);
|
||||
DT(3, simobject_->name() << "-mem-req: bank=" << i << ", " << mem_req);
|
||||
|
||||
simobject_->MemReqPorts.at(i).pop();
|
||||
mem_xbar_->ReqOut.at(i).pop();
|
||||
counter++;
|
||||
}
|
||||
|
||||
|
@ -107,8 +116,8 @@ public:
|
|||
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPorts(config.channels, this)
|
||||
, MemRspPorts(config.channels, this)
|
||||
, MemReqPorts(config.num_ports, this)
|
||||
, MemRspPorts(config.num_ports, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,15 +21,15 @@ namespace vortex {
|
|||
class MemSim : public SimObject<MemSim>{
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t channels;
|
||||
uint32_t num_cores;
|
||||
uint32_t num_banks;
|
||||
uint32_t num_ports;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t counter;
|
||||
uint64_t ticks;
|
||||
|
||||
PerfStats()
|
||||
PerfStats()
|
||||
: counter(0)
|
||||
, ticks(0)
|
||||
{}
|
||||
|
@ -52,7 +52,7 @@ public:
|
|||
void tick();
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
|
|
|
@ -25,7 +25,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
// create memory simulator
|
||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||
PLATFORM_MEMORY_BANKS,
|
||||
uint32_t(arch.num_cores()) * arch.num_clusters()
|
||||
L3_MEM_PORTS
|
||||
});
|
||||
|
||||
// create clusters
|
||||
|
|
|
@ -21,7 +21,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "socket")
|
||||
: SimObject(ctx, StrFormat("socket%d", socket_id))
|
||||
, mem_req_ports(L1_MEM_PORTS, this)
|
||||
, mem_rsp_ports(L1_MEM_PORTS, this)
|
||||
, socket_id_(socket_id)
|
||||
|
@ -31,7 +31,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
auto cores_per_socket = cores_.size();
|
||||
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "socket%d-icaches", socket_id);
|
||||
snprintf(sname, 100, "%s-icaches", this->name().c_str());
|
||||
icaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_ICACHES, CacheSim::Config{
|
||||
!ICACHE_ENABLED,
|
||||
log2ceil(ICACHE_SIZE), // C
|
||||
|
@ -49,7 +49,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
2, // pipeline latency
|
||||
});
|
||||
|
||||
snprintf(sname, 100, "socket%d-dcaches", socket_id);
|
||||
snprintf(sname, 100, "%s-dcaches", this->name().c_str());
|
||||
dcaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_DCACHES, CacheSim::Config{
|
||||
!DCACHE_ENABLED,
|
||||
log2ceil(DCACHE_SIZE), // C
|
||||
|
@ -70,7 +70,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
// connect l1 caches to outgoing memory interfaces
|
||||
for (uint32_t i = 0; i < L1_MEM_PORTS; ++i) {
|
||||
if (i == 0) {
|
||||
snprintf(sname, 100, "socket%d-l1_arb%d", socket_id, i);
|
||||
snprintf(sname, 100, "%s-l1_arb%d", this->name().c_str(), i);
|
||||
auto l1_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, 2, 1);
|
||||
|
||||
icaches_->MemReqPorts.at(0).bind(&l1_arb->ReqIn.at(1));
|
||||
|
@ -82,8 +82,8 @@ Socket::Socket(const SimContext& ctx,
|
|||
l1_arb->ReqOut.at(0).bind(&this->mem_req_ports.at(0));
|
||||
this->mem_rsp_ports.at(0).bind(&l1_arb->RspOut.at(0));
|
||||
} else {
|
||||
this->mem_req_ports.at(i).bind(&dcaches_->MemReqPorts.at(i));
|
||||
dcaches_->MemRspPorts.at(i).bind(&this->mem_rsp_ports.at(i));
|
||||
dcaches_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
|
||||
this->mem_rsp_ports.at(i).bind(&dcaches_->MemRspPorts.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,16 +32,16 @@ LocalMemSwitch::LocalMemSwitch(
|
|||
void LocalMemSwitch::reset() {}
|
||||
|
||||
void LocalMemSwitch::tick() {
|
||||
// process incoming responses
|
||||
// process outgoing responses
|
||||
if (!RspLmem.empty()) {
|
||||
auto& out_rsp = RspLmem.front();
|
||||
DT(4, this->name() << " lmem-rsp: " << out_rsp);
|
||||
DT(4, this->name() << "-lmem-rsp: " << out_rsp);
|
||||
RspIn.push(out_rsp, 1);
|
||||
RspLmem.pop();
|
||||
}
|
||||
if (!RspDC.empty()) {
|
||||
auto& out_rsp = RspDC.front();
|
||||
DT(4, this->name() << " dc-rsp: " << out_rsp);
|
||||
DT(4, this->name() << "-dc-rsp: " << out_rsp);
|
||||
RspIn.push(out_rsp, 1);
|
||||
RspDC.pop();
|
||||
}
|
||||
|
@ -73,12 +73,12 @@ void LocalMemSwitch::tick() {
|
|||
|
||||
if (!out_dc_req.mask.none()) {
|
||||
ReqDC.push(out_dc_req, delay_);
|
||||
DT(4, this->name() << " dc-req: " << out_dc_req);
|
||||
DT(4, this->name() << "-dc-req: " << out_dc_req);
|
||||
}
|
||||
|
||||
if (!out_lmem_req.mask.none()) {
|
||||
ReqLmem.push(out_lmem_req, delay_);
|
||||
DT(4, this->name() << " lmem-req: " << out_lmem_req);
|
||||
DT(4, this->name() << "-lmem-req: " << out_lmem_req);
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
|
@ -104,12 +104,12 @@ void LsuMemAdapter::reset() {}
|
|||
void LsuMemAdapter::tick() {
|
||||
uint32_t input_size = ReqOut.size();
|
||||
|
||||
// process incoming responses
|
||||
// process outgoing responses
|
||||
for (uint32_t i = 0; i < input_size; ++i) {
|
||||
if (RspOut.at(i).empty())
|
||||
continue;
|
||||
auto& out_rsp = RspOut.at(i).front();
|
||||
DT(4, this->name() << " rsp" << i << ": " << out_rsp);
|
||||
DT(4, this->name() << "-rsp" << i << ": " << out_rsp);
|
||||
|
||||
// build memory response
|
||||
LsuRsp in_rsp(input_size);
|
||||
|
@ -155,7 +155,7 @@ void LsuMemAdapter::tick() {
|
|||
|
||||
// send memory request
|
||||
ReqOut.at(i).push(out_req, delay_);
|
||||
DT(4, this->name() << " req" << i << ": " << out_req);
|
||||
DT(4, this->name() << "-req" << i << ": " << out_req);
|
||||
}
|
||||
}
|
||||
ReqIn.pop();
|
||||
|
|
341
sim/simx/types.h
341
sim/simx/types.h
|
@ -483,12 +483,12 @@ public:
|
|||
, Outputs(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, cursors_(num_outputs, 0)
|
||||
, grants_(num_outputs, 0)
|
||||
, num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
{
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 32);
|
||||
assert(num_outputs <= 32);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(num_inputs >= num_outputs);
|
||||
|
||||
// bypass mode
|
||||
|
@ -500,8 +500,8 @@ public:
|
|||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& cursor : cursors_) {
|
||||
cursor = 0;
|
||||
for (auto& grant : grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -517,8 +517,8 @@ public:
|
|||
// process inputs
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (cursors_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + i;
|
||||
uint32_t g = (grants_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + g;
|
||||
if (j >= I)
|
||||
continue;
|
||||
|
||||
|
@ -527,29 +527,132 @@ public:
|
|||
auto& req = req_in.front();
|
||||
Outputs.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
this->update_grant(o, g);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
|
||||
void update_cursor(uint32_t index, uint32_t grant) {
|
||||
void update_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursors_.at(index) = grant + 1;
|
||||
grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> cursors_;
|
||||
std::vector<uint32_t> grants_;
|
||||
uint32_t num_reqs_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Type>
|
||||
class CrossBar : public SimObject<CrossBar<Type>> {
|
||||
public:
|
||||
std::vector<SimPort<Type>> Inputs;
|
||||
std::vector<SimPort<Type>> Outputs;
|
||||
|
||||
CrossBar(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t addr_start = 0,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<CrossBar<Type>>(ctx, name)
|
||||
, Inputs(num_inputs, this)
|
||||
, Outputs(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, grants_(num_outputs, 0)
|
||||
, lg_num_reqs_(log2ceil(num_inputs))
|
||||
, addr_start_(addr_start)
|
||||
, addr_end_(num_outputs-1)
|
||||
, collisions_(0) {
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(ispow2(num_outputs));
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& grant : grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
uint32_t I = Inputs.size();
|
||||
uint32_t O = Outputs.size();
|
||||
uint32_t R = 1 << lg_num_reqs_;
|
||||
|
||||
// process incoming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
int32_t input_idx = -1;
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (grants_.at(o) + r) & (R-1);
|
||||
if (i >= I)
|
||||
continue;
|
||||
auto& req_in = Inputs.at(i);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
// skip if input is not going to this output
|
||||
uint32_t output_idx = 0;
|
||||
if (O != 1) {
|
||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_end_);
|
||||
}
|
||||
if (output_idx != o)
|
||||
continue;
|
||||
if (input_idx != -1) {
|
||||
++collisions_;
|
||||
continue;
|
||||
}
|
||||
input_idx = i;
|
||||
}
|
||||
}
|
||||
if (input_idx != -1) {
|
||||
auto& req_in = Inputs.at(input_idx);
|
||||
auto& req = req_in.front();
|
||||
if (lg_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg_num_reqs_) | input_idx;
|
||||
}
|
||||
DT(4, this->name() << "-req" << input_idx << ": " << req);
|
||||
Outputs.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_grant(o, input_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t collisions() const {
|
||||
return collisions_;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void update_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> grants_;
|
||||
uint32_t lg_num_reqs_;
|
||||
uint32_t addr_start_;
|
||||
uint32_t addr_end_;
|
||||
uint64_t collisions_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
|
||||
public:
|
||||
|
@ -574,12 +677,12 @@ public:
|
|||
, RspOut(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, cursors_(num_outputs, 0)
|
||||
, grants_(num_outputs, 0)
|
||||
, lg_num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
{
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 32);
|
||||
assert(num_outputs <= 32);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(num_inputs >= num_outputs);
|
||||
|
||||
// bypass mode
|
||||
|
@ -592,8 +695,8 @@ public:
|
|||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& cursor : cursors_) {
|
||||
cursor = 0;
|
||||
for (auto& grant : grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -606,25 +709,28 @@ public:
|
|||
if (I == O)
|
||||
return;
|
||||
|
||||
// process outgoing responses
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
// process incoming responses
|
||||
if (!RspOut.at(o).empty()) {
|
||||
auto& rsp = RspOut.at(o).front();
|
||||
uint32_t i = 0;
|
||||
auto& rsp_out = RspOut.at(o);
|
||||
if (!rsp_out.empty()) {
|
||||
auto& rsp = rsp_out.front();
|
||||
uint32_t g = 0;
|
||||
if (lg_num_reqs_ != 0) {
|
||||
i = rsp.tag & (R-1);
|
||||
g = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg_num_reqs_;
|
||||
}
|
||||
DT(4, this->name() << " rsp" << o << ": " << rsp);
|
||||
uint32_t j = o * R + i;
|
||||
DT(4, this->name() << "-rsp" << o << ": " << rsp);
|
||||
uint32_t j = o * R + g;
|
||||
RspIn.at(j).push(rsp, 1);
|
||||
RspOut.at(o).pop();
|
||||
rsp_out.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// process incoming requests
|
||||
// process incoming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (cursors_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + i;
|
||||
uint32_t g = (grants_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + g;
|
||||
if (j >= I)
|
||||
continue;
|
||||
|
||||
|
@ -632,32 +738,193 @@ public:
|
|||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
if (lg_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg_num_reqs_) | i;
|
||||
req.tag = (req.tag << lg_num_reqs_) | g;
|
||||
}
|
||||
DT(4, this->name() << " req" << j << ": " << req);
|
||||
DT(4, this->name() << "-req" << j << ": " << req);
|
||||
ReqOut.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
this->update_grant(o, g);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_cursor(uint32_t index, uint32_t grant) {
|
||||
protected:
|
||||
|
||||
void update_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursors_.at(index) = grant + 1;
|
||||
grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> cursors_;
|
||||
std::vector<uint32_t> grants_;
|
||||
uint32_t lg_num_reqs_;
|
||||
};
|
||||
|
||||
using MemArbiter = TxArbiter<MemReq, MemRsp>;
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
|
||||
public:
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
||||
std::vector<SimPort<Req>> ReqOut;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
|
||||
TxCrossBar(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t addr_start = 0,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
, RspIn(num_inputs, this)
|
||||
, ReqOut(num_outputs, this)
|
||||
, RspOut(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, req_grants_(num_outputs, 0)
|
||||
, rsp_grants_(num_inputs, 0)
|
||||
, lg_num_reqs_(log2ceil(num_inputs))
|
||||
, lg_num_rsps_(log2ceil(num_outputs))
|
||||
, addr_start_(addr_start)
|
||||
, addr_end_(num_outputs-1)
|
||||
, collisions_(0) {
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(ispow2(num_inputs));
|
||||
assert(ispow2(num_outputs));
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& grant : req_grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
for (auto& grant : rsp_grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
uint32_t I = ReqIn.size();
|
||||
uint32_t O = ReqOut.size();
|
||||
uint32_t R = 1 << lg_num_reqs_;
|
||||
uint32_t T = 1 << lg_num_rsps_;
|
||||
|
||||
// process outgoing responses
|
||||
for (uint32_t i = 0; i < I; ++i) {
|
||||
int32_t output_idx = -1;
|
||||
for (uint32_t t = 0; t < T; ++t) {
|
||||
uint32_t o = (rsp_grants_.at(i) + t) & (T-1);
|
||||
if (o >= O)
|
||||
continue;
|
||||
auto& rsp_out = RspOut.at(o);
|
||||
if (!rsp_out.empty()) {
|
||||
auto& rsp = rsp_out.front();
|
||||
// skip if response is not going to current input
|
||||
uint32_t input_idx = 0;
|
||||
if (lg_num_reqs_ != 0) {
|
||||
input_idx = rsp.tag & (R-1);
|
||||
}
|
||||
if (input_idx != i)
|
||||
continue;
|
||||
if (output_idx != -1) {
|
||||
++collisions_;
|
||||
continue;
|
||||
}
|
||||
output_idx = o;
|
||||
}
|
||||
}
|
||||
if (output_idx != -1) {
|
||||
auto& rsp_out = RspOut.at(output_idx);
|
||||
auto& rsp = rsp_out.front();
|
||||
uint32_t input_idx = 0;
|
||||
if (lg_num_reqs_ != 0) {
|
||||
input_idx = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg_num_reqs_;
|
||||
}
|
||||
DT(4, this->name() << "-rsp" << output_idx << ": " << rsp);
|
||||
RspIn.at(input_idx).push(rsp, 1);
|
||||
rsp_out.pop();
|
||||
this->update_rsp_grant(i, output_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// process incoming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
int32_t input_idx = -1;
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (req_grants_.at(o) + r) & (R-1);
|
||||
if (i >= I)
|
||||
continue;
|
||||
auto& req_in = ReqIn.at(i);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
// skip if request is not going to current output
|
||||
uint32_t output_idx = 0;
|
||||
if (O != 1) {
|
||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_end_);
|
||||
}
|
||||
if (output_idx != o)
|
||||
continue;
|
||||
if (input_idx != -1) {
|
||||
++collisions_;
|
||||
continue;
|
||||
}
|
||||
input_idx = i;
|
||||
}
|
||||
}
|
||||
if (input_idx != -1) {
|
||||
auto& req_in = ReqIn.at(input_idx);
|
||||
auto& req = req_in.front();
|
||||
if (lg_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg_num_reqs_) | input_idx;
|
||||
}
|
||||
DT(4, this->name() << "-req" << input_idx << ": " << req);
|
||||
ReqOut.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_req_grant(o, input_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t collisions() const {
|
||||
return collisions_;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void update_req_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
req_grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void update_rsp_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
rsp_grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> req_grants_;
|
||||
std::vector<uint32_t> rsp_grants_;
|
||||
uint32_t lg_num_reqs_;
|
||||
uint32_t lg_num_rsps_;
|
||||
uint32_t addr_start_;
|
||||
uint32_t addr_end_;
|
||||
uint64_t collisions_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -711,4 +978,6 @@ private:
|
|||
uint32_t delay_;
|
||||
};
|
||||
|
||||
using MemArbiter = TxArbiter<MemReq, MemRsp>;
|
||||
using MemCrossBar = TxCrossBar<MemReq, MemRsp>;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue