cache_sim refactoring

This commit is contained in:
Blaise Tine 2023-04-08 18:39:04 -04:00
parent f547550ddd
commit 0c4c384e89
5 changed files with 231 additions and 211 deletions

View file

@ -66,13 +66,13 @@ CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-i
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g0.png -g0"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=1
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=1
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g2.png -g2"
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=2
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=2
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1" --perf=3
CONFIGS="-DEXT_TEX_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1 -z"
CONFIGS="-DEXT_TEX_ENABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-isoccer.png -rsoccer_ref_g1.png -g1"
CONFIGS="-DEXT_TEX_ENABLE -DNUM_TEX_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DTCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -rsoccer_ref_g1.png" --cores=4 --warps=1 --threads=2
@ -89,10 +89,10 @@ rop()
{
echo "begin render output tests..."
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --perf=4
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=4
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DEXT_ROP_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DEXT_ROP_ENABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png"
CONFIGS="-DEXT_ROP_ENABLE -DOCACHE_NUM_BANKS=8" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=4
CONFIGS="-DEXT_ROP_ENABLE -DOCACHE_NUM_BANKS=8" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --perf=5
CONFIGS="-DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=simx --app=rop --args="-rwhitebox_128.png" --cores=4 --warps=1 --threads=2
CONFIGS="-DEXT_ROP_ENABLE -DNUM_ROP_UNITS=2 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=1 --warps=1 --threads=2
CONFIGS="-DEXT_ROP_ENABLE -DNUM_ROP_UNITS=1 -DL1_DISABLE -DSM_DISABLE -DOCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=rop --args="-rwhitebox_128.png" --cores=2 --warps=1 --threads=2
@ -107,10 +107,10 @@ raster()
{
echo "begin rasterizer tests..."
CONFIGS="-DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=simx --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=3
CONFIGS="-DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=3
CONFIGS="-DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=simx --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DEXT_RASTER_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DEXT_RASTER_ENABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_RASTER_ENABLE -DRCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=3
CONFIGS="-DEXT_RASTER_ENABLE -DRCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png" --perf=4
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_NUM_PES=2 -DL1_DISABLE -DSM_DISABLE -DRCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=raster --args="-ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=4" ./ci/blackbox.sh --driver=simx --app=raster --args="-k4 -ttriangle.cgltrace -rtriangle_ref_128.png"
CONFIGS="-DEXT_RASTER_ENABLE -DRASTER_TILE_LOGSIZE=6" ./ci/blackbox.sh --driver=simx --app=raster --args="-k6 -ttriangle.cgltrace -rtriangle_ref_128.png"

View file

@ -64,4 +64,4 @@ clean:
rm -rf $(PROJECT) *.o scope-defs.h $(RTL_DIR)/scope-defs.vh
ifeq ($(TARGET), opaesim)
$(MAKE) -C $(OPAESIM_DIR) clean
endif
endif

View file

@ -15,7 +15,7 @@
#include <VX_config.h>
#include <VX_types.h>
#include "vortex_afu.h"
#include <vortex_afu.h>
#ifdef SCOPE
#include "scope.h"

View file

@ -284,7 +284,7 @@ private:
#endif
}
void eval() {
void eval() {
device_->eval();
#ifdef VCD_OUTPUT
if (sim_trace_enabled()) {
@ -526,4 +526,4 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
impl_->read_mmio64(mmio_num, offset, value);
}
}

View file

@ -11,8 +11,8 @@ using namespace vortex;
struct params_t {
uint32_t sets_per_bank;
uint32_t blocks_per_set;
uint32_t words_per_block;
uint32_t lines_per_set;
uint32_t words_per_line;
uint32_t log2_num_inputs;
uint32_t word_select_addr_start;
@ -38,11 +38,11 @@ struct params_t {
this->log2_num_inputs = log2ceil(config.num_inputs);
this->words_per_block = 1 << offset_bits;
this->blocks_per_set = 1 << config.A;
this->words_per_line = 1 << offset_bits;
this->lines_per_set = 1 << config.A;
this->sets_per_bank = 1 << index_bits;
assert(config.ports_per_bank <= this->words_per_block);
assert(config.ports_per_bank <= this->words_per_line);
// Word select
this->word_select_addr_start = config.W;
@ -94,59 +94,82 @@ struct params_t {
}
};
struct block_t {
bool valid;
bool dirty;
struct line_t {
uint64_t tag;
uint32_t lru_ctr;
bool valid;
bool dirty;
void clear() {
valid = false;
dirty = false;
}
};
struct set_t {
std::vector<block_t> blocks;
set_t(uint32_t size) : blocks(size) {}
std::vector<line_t> lines;
set_t(uint32_t num_ways)
: lines(num_ways)
{}
void clear() {
for (auto& block : blocks) {
block.valid = false;
for (auto& line : lines) {
line.clear();
}
}
};
struct bank_req_port_t {
bool valid;
uint32_t req_id;
uint64_t req_tag;
bool valid;
void clear() {
valid = false;
}
};
struct bank_req_t {
bool valid;
bool write;
bool mshr_replay;
enum ReqType {
None = 0,
Fill = 1,
Replay = 2,
Core = 3
};
std::vector<bank_req_port_t> ports;
uint64_t tag;
uint32_t set_id;
uint32_t cid;
uint64_t uuid;
std::vector<bank_req_port_t> ports;
ReqType type;
bool write;
bank_req_t(uint32_t size)
: valid(false)
, write(false)
, mshr_replay(false)
, tag(0)
, set_id(0)
, cid(0)
, uuid(0)
, ports(size)
bank_req_t(uint32_t num_ports)
: ports(num_ports)
{}
void clear() {
for (auto& port : ports) {
port.clear();
}
type = ReqType::None;
}
};
struct mshr_entry_t : public bank_req_t {
uint32_t block_id;
struct mshr_entry_t {
bank_req_t bank_req;
uint32_t line_id;
mshr_entry_t(uint32_t size = 0)
: bank_req_t(size)
, block_id(0)
mshr_entry_t(uint32_t num_ports)
: bank_req(num_ports)
{}
void clear() {
bank_req.clear();
}
};
class MSHR {
@ -155,8 +178,8 @@ private:
uint32_t size_;
public:
MSHR(uint32_t size)
: entries_(size)
MSHR(uint32_t size, uint32_t num_ports)
: entries_(size, num_ports)
, size_(0)
{}
@ -168,26 +191,23 @@ public:
return (size_ == entries_.size());
}
int lookup(const bank_req_t& bank_req) {
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
auto& entry = entries_.at(i);
if (entry.valid
&& entry.set_id == bank_req.set_id
&& entry.tag == bank_req.tag) {
return i;
bool lookup(const bank_req_t& bank_req) {
for (auto& entry : entries_) {;
if (entry.bank_req.type != bank_req_t::None
&& entry.bank_req.set_id == bank_req.set_id
&& entry.bank_req.tag == bank_req.tag) {
return true;
}
}
return -1;
return false;
}
int allocate(const bank_req_t& bank_req, uint32_t block_id) {
int allocate(const bank_req_t& bank_req, uint32_t line_id) {
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
auto& entry = entries_.at(i);
if (!entry.valid) {
*(bank_req_t*)&entry = bank_req;
entry.valid = true;
entry.mshr_replay = false;
entry.block_id = block_id;
if (entry.bank_req.type == bank_req_t::None) {
entry.bank_req = bank_req;
entry.line_id = line_id;
++size_;
return i;
}
@ -197,13 +217,13 @@ public:
mshr_entry_t& replay(uint32_t id) {
auto& root_entry = entries_.at(id);
assert(root_entry.valid);
// make all related mshr entries for replay
assert(root_entry.bank_req.type == bank_req_t::Core);
// mark all related mshr entries for replay
for (auto& entry : entries_) {
if (entry.valid
&& entry.set_id == root_entry.set_id
&& entry.tag == root_entry.tag) {
entry.mshr_replay = true;
if (entry.bank_req.type == bank_req_t::Core
&& entry.bank_req.set_id == root_entry.bank_req.set_id
&& entry.bank_req.tag == root_entry.bank_req.tag) {
entry.bank_req.type = bank_req_t::Replay;
}
}
return root_entry;
@ -211,9 +231,9 @@ public:
bool pop(bank_req_t* out) {
for (auto& entry : entries_) {
if (entry.valid && entry.mshr_replay) {
*out = entry;
entry.valid = false;
if (entry.bank_req.type == bank_req_t::Replay) {
*out = entry.bank_req;
entry.bank_req.type = bank_req_t::None;
--size_;
return true;
}
@ -223,29 +243,27 @@ public:
void clear() {
for (auto& entry : entries_) {
if (entry.valid && entry.mshr_replay) {
entry.valid = false;
}
entry.clear();
}
size_ = 0;
}
};
struct bank_t {
std::vector<set_t> sets;
MSHR mshr;
std::vector<set_t> sets;
MSHR mshr;
bank_t(const CacheSim::Config& config,
const params_t& params)
: sets(params.sets_per_bank, params.blocks_per_set)
, mshr(config.mshr_size)
: sets(params.sets_per_bank, params.lines_per_set)
, mshr(config.mshr_size, config.ports_per_bank)
{}
void clear() {
mshr.clear();
void clear() {
for (auto& set : sets) {
set.clear();
}
mshr.clear();
}
};
@ -261,7 +279,8 @@ private:
Switch<MemReq, MemRsp>::Ptr bypass_switch_;
std::vector<SimPort<MemReq>> mem_req_ports_;
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
uint32_t flush_cycles_;
std::vector<bank_req_t> pipeline_reqs_;
uint32_t init_cycles_;
PerfStats perf_stats_;
uint64_t pending_read_reqs_;
uint64_t pending_write_reqs_;
@ -275,6 +294,7 @@ public:
, banks_(config.num_banks, {config, params_})
, mem_req_ports_(config.num_banks, simobject)
, mem_rsp_ports_(config.num_banks, simobject)
, pipeline_reqs_(config.num_banks, config.ports_per_bank)
{
char sname[100];
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
@ -308,8 +328,8 @@ public:
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
// calculate tag flush cycles
flush_cycles_ = params_.sets_per_bank * params_.blocks_per_set;
// calculate cache initialization cycles
init_cycles_ = params_.sets_per_bank * params_.lines_per_set;
}
void reset() {
@ -320,74 +340,72 @@ public:
bank.clear();
}
perf_stats_ = PerfStats();
pending_read_reqs_ = 0;
pending_read_reqs_ = 0;
pending_write_reqs_ = 0;
pending_fill_reqs_ = 0;
pending_fill_reqs_ = 0;
}
void tick() {
if (config_.bypass)
return;
// wait on flush cycles
if (flush_cycles_ != 0) {
--flush_cycles_;
// wait on cache initialization cycles
if (init_cycles_ != 0) {
--init_cycles_;
return;
}
// per-bank pipeline request
std::vector<bank_req_t> pipeline_reqs(config_.num_banks, config_.ports_per_bank);
// calculate memory latency
perf_stats_.mem_latency += pending_fill_reqs_;
// handle bypasss responses
auto& bypass_port = bypass_switch_->RspIn.at(1);
if (!bypass_port.empty()) {
auto& mem_rsp = bypass_port.front();
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
DT(3, simobject_->name() << "-core-" << core_rsp);
bypass_port.pop();
}
// handle MSHR replay
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& bank = banks_.at(bank_id);
auto& pipeline_req = pipeline_reqs.at(bank_id);
bank.mshr.pop(&pipeline_req);
}
// handle memory fills
std::vector<bool> pending_fill_req(config_.num_banks, false);
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
if (!mem_rsp_port.empty()) {
auto& mem_rsp = mem_rsp_port.front();
DT(3, simobject_->name() << "-dram-" << mem_rsp);
this->processMemoryFill(bank_id, mem_rsp.tag);
pending_fill_req.at(bank_id) = true;
mem_rsp_port.pop();
// handle cache bypasss responses
{
auto& bypass_port = bypass_switch_->RspIn.at(1);
if (!bypass_port.empty()) {
auto& mem_rsp = bypass_port.front();
this->processBypassResponse(mem_rsp);
bypass_port.pop();
}
}
// handle incoming core requests
// initialize pipeline request
for (auto& pipeline_req : pipeline_reqs_) {
pipeline_req.clear();
}
// schedule MSHR replay
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& bank = banks_.at(bank_id);
auto& pipeline_req = pipeline_reqs_.at(bank_id);
bank.mshr.pop(&pipeline_req);
}
// schedule memory fill
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
if (mem_rsp_port.empty())
continue;
auto& pipeline_req = pipeline_reqs_.at(bank_id);
if (pipeline_req.type != bank_req_t::None)
continue;
auto& mem_rsp = mem_rsp_port.front();
DT(3, simobject_->name() << "-dram-" << mem_rsp);
pipeline_req.type = bank_req_t::Fill;
pipeline_req.tag = mem_rsp.tag;
mem_rsp_port.pop();
}
// schedule core requests
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
if (core_req_port.empty())
continue;
auto& core_req = core_req_port.front();
// check cache bypassing
if (core_req.addr_type == AddrType::IO) {
DT(3, simobject_->name() << "-core-" << core_req);
if (core_req.type == AddrType::IO) {
// send bypass request
this->processBypassRequest(core_req, req_id);
// remove request
core_req_port.pop();
continue;
@ -397,43 +415,20 @@ public:
auto set_id = params_.addr_set_id(core_req.addr);
auto tag = params_.addr_tag(core_req.addr);
auto port_id = req_id % config_.ports_per_bank;
// create bank request
bank_req_t bank_req(config_.ports_per_bank);
bank_req.valid = true;
bank_req.write = core_req.write;
bank_req.mshr_replay = false;
bank_req.tag = tag;
bank_req.set_id = set_id;
bank_req.cid = core_req.cid;
bank_req.uuid = core_req.uuid;
bank_req.ports.at(port_id) = {true, req_id, core_req.tag};
auto& bank = banks_.at(bank_id);
auto& pipeline_req = pipeline_reqs.at(bank_id);
auto& bank = banks_.at(bank_id);
auto& pipeline_req = pipeline_reqs_.at(bank_id);
// check pending MSHR replay
if (pipeline_req.valid
&& pipeline_req.mshr_replay) {
// stall
continue;
}
// check pending fill request
if (pending_fill_req.at(bank_id)) {
// stall
continue;
}
// check MSHR capacity if read or writeback
// check MSHR capacity
if ((!core_req.write || !config_.write_through)
&& bank.mshr.full()) {
++perf_stats_.mshr_stalls;
++perf_stats_.bank_stalls;
continue;
}
}
// check bank conflicts
if (pipeline_req.valid) {
if (pipeline_req.type == bank_req_t::Core) {
// check port conflict
if (pipeline_req.write != core_req.write
|| pipeline_req.set_id != set_id
@ -442,11 +437,23 @@ public:
++perf_stats_.bank_stalls;
continue;
}
// update pending request infos
pipeline_req.ports.at(port_id) = bank_req.ports.at(port_id);
} else {
// extend request ports
pipeline_req.ports.at(port_id) = bank_req_port_t{req_id, core_req.tag, true};
} else if (pipeline_req.type == bank_req_t::None) {
// schedule new request
pipeline_req = bank_req;
bank_req_t bank_req(config_.ports_per_bank);
bank_req.ports.at(port_id) = bank_req_port_t{req_id, core_req.tag, true};
bank_req.tag = tag;
bank_req.set_id = set_id;
bank_req.cid = core_req.cid;
bank_req.uuid = core_req.uuid;
bank_req.type = bank_req_t::Core;
bank_req.write = core_req.write;
pipeline_req = bank_req;
} else {
// bank in use
++perf_stats_.bank_stalls;
continue;
}
if (core_req.write)
@ -461,7 +468,7 @@ public:
}
// process active request
this->processBankRequest(pipeline_reqs);
this->processBankRequests();
}
const PerfStats& perf_stats() const {
@ -470,7 +477,17 @@ public:
private:
void processBypassResponse(const MemRsp& mem_rsp) {
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
DT(3, simobject_->name() << "-core-" << core_rsp);
}
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
DT(3, simobject_->name() << "-core-" << core_req);
{
MemReq mem_req(core_req);
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
@ -485,27 +502,25 @@ private:
}
}
void processMemoryFill(uint32_t bank_id, uint32_t mshr_id) {
// update block
auto& bank = banks_.at(bank_id);
auto& entry = bank.mshr.replay(mshr_id);
auto& set = bank.sets.at(entry.set_id);
auto& block = set.blocks.at(entry.block_id);
block.valid = true;
block.tag = entry.tag;
--pending_fill_reqs_;
}
void processBankRequest(const std::vector<bank_req_t>& pipeline_reqs) {
void processBankRequests() {
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& pipeline_req = pipeline_reqs.at(bank_id);
if (!pipeline_req.valid)
continue;
auto& bank = banks_.at(bank_id);
auto& set = bank.sets.at(pipeline_req.set_id);
if (pipeline_req.mshr_replay) {
auto pipeline_req = pipeline_reqs_.at(bank_id);
switch (pipeline_req.type) {
case bank_req_t::None:
break;
case bank_req_t::Fill: {
// update cache line
auto& bank = banks_.at(bank_id);
auto& entry = bank.mshr.replay(pipeline_req.tag);
auto& set = bank.sets.at(entry.bank_req.set_id);
auto& line = set.lines.at(entry.line_id);
line.valid = true;
line.tag = entry.bank_req.tag;
--pending_fill_reqs_;
} break;
case bank_req_t::Replay: {
// send core response
if (!pipeline_req.write || config_.write_reponse) {
for (auto& info : pipeline_req.ports) {
@ -516,30 +531,32 @@ private:
DT(3, simobject_->name() << "-core-" << core_rsp);
}
}
} else {
} break;
case bank_req_t::Core: {
bool hit = false;
bool found_free_block = false;
uint32_t hit_block_id = 0;
uint32_t repl_block_id = 0;
bool found_free_line = false;
uint32_t hit_line_id = 0;
uint32_t repl_line_id = 0;
uint32_t max_cnt = 0;
for (uint32_t i = 0, n = set.blocks.size(); i < n; ++i) {
auto& block = set.blocks.at(i);
if (block.valid) {
if (block.tag == pipeline_req.tag) {
block.lru_ctr = 0;
hit_block_id = i;
auto& set = bank.sets.at(pipeline_req.set_id);
for (uint32_t i = 0, n = set.lines.size(); i < n; ++i) {
auto& line = set.lines.at(i);
if (line.valid) {
if (line.tag == pipeline_req.tag) {
line.lru_ctr = 0;
hit_line_id = i;
hit = true;
} else {
++block.lru_ctr;
++line.lru_ctr;
}
if (max_cnt < block.lru_ctr) {
max_cnt = block.lru_ctr;
repl_block_id = i;
if (max_cnt < line.lru_ctr) {
max_cnt = line.lru_ctr;
repl_line_id = i;
}
} else {
found_free_block = true;
repl_block_id = i;
found_free_line = true;
repl_line_id = i;
}
}
@ -549,19 +566,19 @@ private:
//
if (pipeline_req.write) {
// handle write hit
auto& hit_block = set.blocks.at(hit_block_id);
auto& hit_line = set.lines.at(hit_line_id);
if (config_.write_through) {
// forward write request to memory
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_block.tag);
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_line.tag);
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).send(mem_req, 1);
DT(3, simobject_->name() << "-dram-" << mem_req);
} else {
// mark block as dirty
hit_block.dirty = true;
// mark line as dirty
hit_line.dirty = true;
}
}
// send core response
@ -583,12 +600,12 @@ private:
else
++perf_stats_.read_misses;
if (!found_free_block && !config_.write_through) {
// write back dirty block
auto& repl_block = set.blocks.at(repl_block_id);
if (repl_block.dirty) {
if (!found_free_line && !config_.write_through) {
// write back dirty line
auto& repl_line = set.lines.at(repl_line_id);
if (repl_line.dirty) {
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_block.tag);
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_line.tag);
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req_ports_.at(bank_id).send(mem_req, 1);
@ -620,13 +637,13 @@ private:
}
} else {
// MSHR lookup
int pending = bank.mshr.lookup(pipeline_req);
auto mshr_pending = bank.mshr.lookup(pipeline_req);
// allocate MSHR
int mshr_id = bank.mshr.allocate(pipeline_req, repl_block_id);
auto mshr_id = bank.mshr.allocate(pipeline_req, repl_line_id);
// send fill request
if (pending == -1) {
if (!mshr_pending) {
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
mem_req.write = false;
@ -639,8 +656,11 @@ private:
}
}
}
} break;
}
}
// calculate memory latency
perf_stats_.mem_latency += pending_fill_reqs_;
}
};