vortex/sim/simx/mem_coalescer.cpp
tinebp 704f525fd6 memory mem_coalescer miss perf counter
RTL perf counters refactoring
2024-12-26 08:00:36 -08:00

163 lines
No EOL
4.2 KiB
C++

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mem_coalescer.h"
using namespace vortex;
MemCoalescer::MemCoalescer(
const SimContext& ctx,
const char* name,
uint32_t input_size,
uint32_t output_size,
uint32_t line_size,
uint32_t queue_size,
uint32_t delay
) : SimObject<MemCoalescer>(ctx, name)
, ReqIn(this)
, RspIn(this)
, ReqOut(this)
, RspOut(this)
, input_size_(input_size)
, output_size_(output_size)
, output_ratio_(input_size / output_size)
, pending_rd_reqs_(queue_size)
, sent_mask_(input_size)
, line_size_(line_size)
, delay_(delay)
{}
void MemCoalescer::reset() {
sent_mask_.reset();
}
void MemCoalescer::tick() {
// process outgoing responses
if (!RspOut.empty()) {
auto& out_rsp = RspOut.front();
DT(4, this->name() << "-mem-rsp: " << out_rsp);
auto& entry = pending_rd_reqs_.at(out_rsp.tag);
BitVector<> rsp_mask(input_size_);
for (uint32_t o = 0; o < output_size_; ++o) {
if (!out_rsp.mask.test(o))
continue;
for (uint32_t r = 0; r < output_ratio_; ++r) {
uint32_t i = o * output_ratio_ + r;
if (entry.mask.test(i))
rsp_mask.set(i);
}
}
// build memory response
LsuRsp in_rsp(input_size_);
in_rsp.mask = rsp_mask;
in_rsp.tag = entry.tag;
in_rsp.cid = out_rsp.cid;
in_rsp.uuid = out_rsp.uuid;
// send memory response
RspIn.push(in_rsp, 1);
// track remaining responses
assert(!entry.mask.none());
entry.mask &= ~rsp_mask;
if (entry.mask.none()) {
// whole response received, release tag
pending_rd_reqs_.release(out_rsp.tag);
}
RspOut.pop();
}
// process incoming requests
if (ReqIn.empty())
return;
auto& in_req = ReqIn.front();
assert(in_req.mask.size() == input_size_);
assert(!in_req.mask.none());
// ensure we can allocate a response tag
if (pending_rd_reqs_.full()) {
DT(4, "*** " << this->name() << "-queue-full: " << in_req);
return;
}
uint64_t addr_mask = ~uint64_t(line_size_-1);
BitVector<> out_mask(output_size_);
std::vector<uint64_t> out_addrs(output_size_);
BitVector<> cur_mask(input_size_);
for (uint32_t o = 0; o < output_size_; ++o) {
for (uint32_t r = 0; r < output_ratio_; ++r) {
uint32_t i = o * output_ratio_ + r;
if (sent_mask_.test(i) || !in_req.mask.test(i))
continue;
uint64_t seed_addr = in_req.addrs.at(i) & addr_mask;
cur_mask.set(i);
// coalesce matching requests
for (uint32_t s = r + 1; s < output_ratio_; ++s) {
uint32_t j = o * output_ratio_ + s;
if (sent_mask_.test(j) || !in_req.mask.test(j))
continue;
uint64_t match_addr = in_req.addrs.at(j) & addr_mask;
if (match_addr == seed_addr) {
cur_mask.set(j);
}
}
out_mask.set(o);
out_addrs.at(o) = seed_addr;
break;
}
}
assert(!out_mask.none());
uint32_t tag = 0;
if (!in_req.write) {
// allocate a response tag for read requests
tag = pending_rd_reqs_.allocate(pending_req_t{in_req.tag, cur_mask});
}
// build memory request
LsuReq out_req{output_size_};
out_req.mask = out_mask;
out_req.tag = tag;
out_req.write = in_req.write;
out_req.addrs = out_addrs;
out_req.cid = in_req.cid;
out_req.uuid = in_req.uuid;
// send memory request
ReqOut.push(out_req, delay_);
DT(4, this->name() << "-mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
// track partial responses
perf_stats_.misses += (cur_mask.count() != in_req.mask.count());
// update sent mask
sent_mask_ |= cur_mask;
if (sent_mask_ == in_req.mask) {
ReqIn.pop();
sent_mask_.reset();
}
}
const MemCoalescer::PerfStats& MemCoalescer::perf_stats() const {
return perf_stats_;
}