fixes to SimX's multiports memory support

This commit is contained in:
tinebp 2024-12-02 17:51:42 -08:00
parent 3e4bbfc9f0
commit 3b454efd56
17 changed files with 280 additions and 234 deletions

View file

@ -14,8 +14,6 @@
`ifndef VX_CONFIG_VH
`define VX_CONFIG_VH
`ifndef MIN
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
`endif
@ -170,8 +168,8 @@
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 2
`ifndef PLATFORM_MEMORY_BANKS
`define PLATFORM_MEMORY_BANKS 1
`endif
`ifdef XLEN_64
@ -193,7 +191,7 @@
`endif
`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 64'h0F0000000
`endif
@ -218,7 +216,7 @@
`endif
`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
`endif
@ -303,13 +301,13 @@
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV32 //or BARE
`endif
`ifndef PT_LEVEL
`ifndef PT_LEVEL
`define PT_LEVEL (2)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (4)
`endif
`ifndef NUM_PTE_ENTRY
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (1024)
`endif
`ifndef PT_SIZE_LIMIT
@ -319,13 +317,13 @@
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV39 //or BARE
`endif
`ifndef PT_LEVEL
`ifndef PT_LEVEL
`define PT_LEVEL (3)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (8)
`endif
`ifndef NUM_PTE_ENTRY
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (512)
`endif
`ifndef PT_SIZE_LIMIT
@ -604,7 +602,7 @@
// Number of Banks
`ifndef DCACHE_NUM_BANKS
`define DCACHE_NUM_BANKS `MIN(`NUM_LSU_LANES, 4)
`define DCACHE_NUM_BANKS `MIN(DCACHE_NUM_REQS, 16)
`endif
// Core Response Queue Size
@ -647,6 +645,15 @@
`define DCACHE_REPL_POLICY 1
`endif
// Number of Memory Ports
`ifndef L1_MEM_PORTS
`ifdef L1_DISABLE
`define L1_MEM_PORTS `L2_MEM_PORTS
`else
`define L1_MEM_PORTS `MIN(`L2_MEM_PORTS, `DCACHE_NUM_BANKS)
`endif
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
`ifndef LMEM_DISABLE
@ -674,7 +681,7 @@
// Number of Banks
`ifndef L2_NUM_BANKS
`define L2_NUM_BANKS `MIN(4, `NUM_SOCKETS)
`define L2_NUM_BANKS `MIN(L2_NUM_REQS, 16)
`endif
// Core Response Queue Size
@ -717,6 +724,15 @@
`define L2_REPL_POLICY 1
`endif
// Number of Memory Ports
`ifndef L2_MEM_PORTS
`ifdef L2_ENABLE
`define L2_MEM_PORTS `MIN(`L3_MEM_PORTS, `L2_NUM_BANKS)
`else
`define L2_MEM_PORTS `L3_MEM_PORTS
`endif
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Cache Size
@ -726,7 +742,7 @@
// Number of Banks
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`define L3_NUM_BANKS `MIN(L3_NUM_REQS, 16)
`endif
// Core Response Queue Size
@ -769,9 +785,13 @@
`define L3_REPL_POLICY 1
`endif
// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
// Number of Memory Ports
`ifndef L3_MEM_PORTS
`ifdef L3_ENABLE
`define L3_MEM_PORTS `MIN(`PLATFORM_MEMORY_BANKS, `L3_NUM_BANKS)
`else
`define L3_MEM_PORTS `PLATFORM_MEMORY_BANKS
`endif
`endif
// ISA Extensions /////////////////////////////////////////////////////////////

View file

@ -78,10 +78,10 @@ public:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
_value = PLATFORM_MEMORY_BANKS;
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;

View file

@ -65,7 +65,7 @@ public:
~vx_device() {
#ifdef VM_ENABLE
global_mem_.release(PAGE_TABLE_BASE_ADDR);
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
// page_table_mem_->release(i->second << MEM_PAGE_SIZE);
delete virtual_mem_;
delete page_table_mem_;
@ -113,10 +113,10 @@ public:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
_value = PLATFORM_MEMORY_BANKS;
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
@ -164,7 +164,7 @@ public:
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
return 0;
// Now all conditions are not met. Return true because the address needs translation
// Now all conditions are not met. Return true because the address needs translation
return 1;
}
@ -277,7 +277,7 @@ public:
#ifdef VM_ENABLE
uint64_t pAddr = page_table_walk(dest_addr);
// uint64_t pAddr;
// try {
// try {
// pAddr = page_table_walk(dest_addr);
// } catch ( Page_Fault_Exception ) {
// // HW: place holder
@ -466,18 +466,18 @@ public:
CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), {
return err;
});
if (virtual_mem_ == nullptr) {
// virtual_mem_ does not intefere with physical mem, so no need to free space
return 1;
}
if (VM_ADDR_MODE == BARE)
DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)");
else
CHECK_ERR(alloc_page_table(&pt_addr),{return err;});
CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;});
return 0;
}
@ -604,7 +604,7 @@ public:
}
else
{
// Leaf node found.
// Leaf node found.
// Check RWX permissions according to access type.
if (pte.r == 0)
{

View file

@ -152,7 +152,9 @@ public:
// start
device_->reset = 0;
device_->mem_req_ready = 1;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
device_->mem_req_ready[b] = 1;
}
// wait on device to go busy
while (!device_->busy) {
@ -186,11 +188,14 @@ private:
this->dcr_bus_reset();
print_bufs_.clear();
pending_mem_reqs_.clear();
{
for (auto& reqs : pending_mem_reqs_) {
reqs.clear();
}
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
std::queue<mem_req_t*> empty;
std::swap(dram_queue_, empty);
std::swap(dram_queue_[b], empty);
}
device_->reset = 1;
@ -217,17 +222,19 @@ private:
dram_sim_.tick();
if (!dram_queue_.empty()) {
auto mem_req = dram_queue_.front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
if (!dram_queue_[b].empty()) {
auto mem_req = dram_queue_[b].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
dram_queue_[b].pop();
}
}, mem_req)) {
dram_queue_.pop();
}
}
@ -247,101 +254,107 @@ private:
}
void mem_bus_reset() {
device_->mem_req_ready = 0;
device_->mem_rsp_valid = 0;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
device_->mem_req_ready[b] = 0;
device_->mem_rsp_valid[b] = 0;
}
}
void mem_bus_eval(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->mem_rsp_ready;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
mem_rd_rsp_ready_[b] = device_->mem_rsp_ready[b];
}
return;
}
// process memory read responses
if (device_->mem_rsp_valid && mem_rd_rsp_ready_) {
device_->mem_rsp_valid = 0;
}
if (!device_->mem_rsp_valid) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready) {
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->data[i]);
}
printf("\n");
*/
device_->mem_rsp_valid = 1;
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag = mem_rsp->tag;
pending_mem_reqs_.erase(mem_rsp_it);
delete mem_rsp;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
// process memory read responses
if (device_->mem_rsp_valid[b] && mem_rd_rsp_ready_[b]) {
device_->mem_rsp_valid[b] = 0;
}
}
// process memory requests
if (device_->mem_req_valid && device_->mem_req_ready) {
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
if (device_->mem_req_rw) {
auto byteen = device_->mem_req_byteen;
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
// process writes
/*
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
}
printf(", data=0x");
if (!device_->mem_rsp_valid[b]) {
if (!pending_mem_reqs_[b].empty()
&& (*pending_mem_reqs_[b].begin())->ready) {
auto mem_rsp_it = pending_mem_reqs_[b].begin();
auto mem_rsp = *mem_rsp_it;
/*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%d=%02x,", i, data[i]);
printf("%02x", mem_rsp->data[i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
device_->mem_rsp_valid[b] = 1;
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data[b]), mem_rsp->data.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag[b] = mem_rsp->tag;
pending_mem_reqs_[b].erase(mem_rsp_it);
delete mem_rsp;
}
}
// process memory requests
if (device_->mem_req_valid[b] && device_->mem_req_ready[b]) {
uint64_t byte_addr = (device_->mem_req_addr[b] * MEM_BLOCK_SIZE);
if (device_->mem_req_rw[b]) {
auto byteen = device_->mem_req_byteen[b];
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data[b]);
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
// process writes
/*
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
}
printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%d=%02x,", i, data[i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag[b];
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = true;
// send dram request
dram_queue_[b].push(mem_req);
}
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag;
mem_req->tag = device_->mem_req_tag[b];
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = true;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_[b].emplace_back(mem_req);
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
// send dram request
dram_queue_.push(mem_req);
dram_queue_[b].push(mem_req);
}
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag;
mem_req->addr = byte_addr;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_.emplace_back(mem_req);
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
// send dram request
dram_queue_.push(mem_req);
}
}
}
@ -369,21 +382,21 @@ private:
std::unordered_map<int, std::stringstream> print_bufs_;
std::list<mem_req_t*> pending_mem_reqs_;
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
std::queue<mem_req_t*> dram_queue_;
std::queue<mem_req_t*> dram_queue_[PLATFORM_MEMORY_BANKS];
std::array<bool, PLATFORM_MEMORY_BANKS> mem_rd_rsp_ready_;
DramSim dram_sim_;
VVortex* device_;
RAM* ram_;
#ifdef VCD_OUTPUT
VerilatedVcdC *tfp_;
#endif
bool mem_rd_rsp_ready_;
RAM* ram_;
};
///////////////////////////////////////////////////////////////////////////////

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,12 +24,12 @@ public:
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_inputs,
uint32_t num_caches,
CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_inputs,
uint32_t num_caches,
uint32_t num_requests,
const CacheSim::Config& cache_config)
const CacheSim::Config& cache_config)
: SimObject(ctx, name)
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(num_requests, this))
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(num_requests, this))
@ -44,21 +44,21 @@ public:
}
char sname[100];
std::vector<MemSwitch::Ptr> input_arbs(num_inputs);
std::vector<MemArbiter::Ptr> input_arbs(num_inputs);
for (uint32_t j = 0; j < num_inputs; ++j) {
snprintf(sname, 100, "%s-input-arb%d", name, j);
input_arbs.at(j) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
input_arbs.at(j) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
for (uint32_t i = 0; i < num_requests; ++i) {
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(j)->ReqIn.at(i));
input_arbs.at(j)->RspIn.at(i).bind(&this->CoreRspPorts.at(j).at(i));
}
}
std::vector<MemSwitch::Ptr> mem_arbs(cache_config.num_inputs);
std::vector<MemArbiter::Ptr> mem_arbs(cache_config.num_inputs);
for (uint32_t i = 0; i < cache_config.num_inputs; ++i) {
snprintf(sname, 100, "%s-mem-arb%d", name, i);
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
mem_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
for (uint32_t j = 0; j < num_inputs; ++j) {
input_arbs.at(j)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(j));
mem_arbs.at(i)->RspIn.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
@ -66,7 +66,7 @@ public:
}
snprintf(sname, 100, "%s-cache-arb", name);
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
auto cache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
for (uint32_t i = 0; i < num_caches; ++i) {
snprintf(sname, 100, "%s-cache%d", name, i);
@ -88,14 +88,14 @@ public:
~CacheCluster() {}
void reset() {}
void tick() {}
CacheSim::PerfStats perf_stats() const {
CacheSim::PerfStats perf;
for (auto cache : caches_) {
perf += cache->perf_stats();
}
}
return perf;
}

View file

@ -305,8 +305,8 @@ private:
Config config_;
params_t params_;
std::vector<bank_t> banks_;
MemSwitch::Ptr bank_switch_;
MemSwitch::Ptr bypass_switch_;
MemArbiter::Ptr bank_arb_;
MemArbiter::Ptr bypass_arb_;
std::vector<SimPort<MemReq>> mem_req_ports_;
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
std::vector<bank_req_t> pipeline_reqs_;
@ -330,33 +330,33 @@ public:
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
if (config_.bypass) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
simobject->CoreReqPorts.at(i).bind(&bypass_arb_->ReqIn.at(i));
bypass_arb_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
}
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
return;
}
if (strcmp(simobject->name().c_str(), "l3cache")) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, 2);
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
bank_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
mem_req_ports_.at(i).bind(&bank_arb_->ReqIn.at(i));
bank_arb_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
bank_arb_->ReqOut.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&bank_arb_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
mem_req_ports_.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
} else {
// TODO: Change this into a crossbar
@ -364,45 +364,45 @@ public:
//printf("%s connecting\n", simobject_->name().c_str());
//3
if (config.B != 0) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, max, max);
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
bypass_arb_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_arb_->RspOut.at(i));
}
} else {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, 2);
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
}
if (config.B != 0)
{
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
bank_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
{
//1
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
mem_req_ports_.at(i).bind(&bank_arb_->ReqIn.at(i));
bank_arb_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
//2
if (config_.num_inputs > 1) {
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
bank_arb_->ReqOut.at(i % (1 << config.B)).bind(&bypass_arb_->ReqIn.at(i));
bypass_arb_->RspIn.at(i).bind(&bank_arb_->RspOut.at(i % (1 << config.B)));
}
} else {
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
bank_arb_->ReqOut.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&bank_arb_->RspOut.at(0));
}
}
else
{
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
mem_req_ports_.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
}
@ -435,7 +435,7 @@ public:
// handle cache bypasss responses
{
auto& bypass_port = bypass_switch_->RspIn.at(1);
auto& bypass_port = bypass_arb_->RspIn.at(1);
if (!bypass_port.empty()) {
auto& mem_rsp = bypass_port.front();
this->processBypassResponse(mem_rsp);
@ -568,7 +568,7 @@ private:
{
MemReq mem_req(core_req);
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
bypass_switch_->ReqIn.at(1).push(mem_req, 1);
bypass_arb_->ReqIn.at(1).push(mem_req, 1);
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
}
@ -743,8 +743,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
: SimObject<CacheSim>(ctx, name)
, CoreReqPorts(config.num_inputs, this)
, CoreRspPorts(config.num_inputs, this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, MemReqPorts(config.mem_ports, this)
, MemRspPorts(config.mem_ports, this)
, impl_(new Impl(this, config))
{}

View file

@ -30,6 +30,7 @@ public:
uint8_t addr_width; // word address bits
uint8_t ports_per_bank; // number of ports per bank
uint8_t num_inputs; // number of inputs
uint8_t mem_ports; // memory ports
bool write_back; // is write-back
bool write_reponse; // enable write response
uint16_t mshr_size; // MSHR buffer size

View file

@ -36,10 +36,10 @@ Cluster::Cluster(const SimContext& ctx,
// create sockets
snprintf(sname, 100, "cluster%d-icache-arb", cluster_id);
auto icache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
auto icache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
snprintf(sname, 100, "cluster%d-dcache-arb", cluster_id);
auto dcache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
auto dcache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
@ -48,11 +48,11 @@ Cluster::Cluster(const SimContext& ctx,
arch,
dcrs);
socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i));
icache_switch->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
socket->icache_mem_req_port.bind(&icache_arb->ReqIn.at(i));
icache_arb->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
socket->dcache_mem_req_port.bind(&dcache_switch->ReqIn.at(i));
dcache_switch->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
socket->dcache_mem_req_port.bind(&dcache_arb->ReqIn.at(i));
dcache_arb->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
sockets_.at(i) = socket;
}
@ -69,7 +69,8 @@ Cluster::Cluster(const SimContext& ctx,
log2ceil(L2_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
2, // request size
L2_NUM_REQS, // request size
L2_MEM_PORTS, // memory ports
L2_WRITEBACK, // write-back
false, // write response
L2_MSHR_SIZE, // mshr size
@ -79,11 +80,11 @@ Cluster::Cluster(const SimContext& ctx,
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
icache_arb->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_arb->RspOut.at(0));
dcache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
l2cache_->CoreRspPorts.at(1).bind(&dcache_switch->RspOut.at(0));
dcache_arb->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
l2cache_->CoreRspPorts.at(1).bind(&dcache_arb->RspOut.at(0));
}
Cluster::~Cluster() {

View file

@ -27,10 +27,15 @@ inline constexpr int LSU_WORD_SIZE = (XLEN / 8);
inline constexpr int LSU_CHANNELS = NUM_LSU_LANES;
inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS);
// The dcache uses coalesced memory blocks
inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE;
inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE);
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE);
inline constexpr int L2_NUM_REQS = 2;
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS;
inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH;

View file

@ -76,7 +76,7 @@ Core::Core(const SimContext& ctx,
// create lsu demux
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
lsu_demux_.at(i) = LocalMemDemux::Create(sname, 1);
lsu_demux_.at(i) = LocalMemSwitch::Create(sname, 1);
}
// create lsu dcache adapter
@ -130,7 +130,7 @@ Core::Core(const SimContext& ctx,
dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES);
dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES);
dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES);
// initialize execute units
func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
@ -141,7 +141,7 @@ Core::Core(const SimContext& ctx,
// bind commit arbiters
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
auto arbiter = TraceArbiter::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
func_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
}

View file

@ -34,7 +34,7 @@ class Socket;
class Arch;
class DCRS;
using TraceSwitch = Mux<instr_trace_t*>;
using TraceArbiter = Arbiter<instr_trace_t*>;
class Core : public SimObject<Core> {
public:
@ -154,7 +154,7 @@ private:
std::vector<Dispatcher::Ptr> dispatchers_;
std::vector<FuncUnit::Ptr> func_units_;
LocalMem::Ptr local_mem_;
std::vector<LocalMemDemux::Ptr> lsu_demux_;
std::vector<LocalMemSwitch::Ptr> lsu_demux_;
std::vector<MemCoalescer::Ptr> mem_coalescers_;
std::vector<LsuMemAdapter::Ptr> lsu_dcache_adapter_;
std::vector<LsuMemAdapter::Ptr> lsu_lmem_adapter_;
@ -169,7 +169,7 @@ private:
PerfStats perf_stats_;
std::vector<TraceSwitch::Ptr> commit_arbs_;
std::vector<TraceArbiter::Ptr> commit_arbs_;
uint32_t commit_exe_;
uint32_t ibuffer_idx_;

View file

@ -59,7 +59,7 @@ public:
dram_sim_.tick();
uint32_t counter = 0;
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
for (uint32_t i = 0; i < config_.channels; ++i) {
if (simobject_->MemReqPorts.at(i).empty())
continue;
@ -107,8 +107,8 @@ public:
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, MemReqPorts(config.channels, this)
, MemRspPorts(config.channels, this)
, impl_(new Impl(this, config))
{}

View file

@ -24,7 +24,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
// create memory simulator
memsim_ = MemSim::Create("dram", MemSim::Config{
MEMORY_BANKS,
PLATFORM_MEMORY_BANKS,
uint32_t(arch.num_cores()) * arch.num_clusters()
});
@ -38,7 +38,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
log2ceil(L3_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
uint8_t(arch.num_clusters()), // request size
L3_NUM_REQS, // request size
L3_MEM_PORTS, // memory ports
L3_WRITEBACK, // write-back
false, // write response
L3_MSHR_SIZE, // mshr size
@ -47,7 +48,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
);
// connect L3 memory ports
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
}
@ -61,11 +62,11 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
}
// set up memory profiling
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
__unused (cycle);
perf_mem_reads_ += !req.write;
perf_mem_writes_ += req.write;
perf_mem_reads_ += !req.write;
perf_mem_writes_ += req.write;
perf_mem_pending_reads_ += !req.write;
});
memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){

View file

@ -44,6 +44,7 @@ Socket::Socket(const SimContext& ctx,
XLEN, // address bits
1, // number of ports
1, // number of inputs
1, // memory ports
false, // write-back
false, // write response
(uint8_t)arch.num_warps(), // mshr size
@ -64,6 +65,7 @@ Socket::Socket(const SimContext& ctx,
XLEN, // address bits
1, // number of ports
DCACHE_NUM_REQS, // number of inputs
L1_MEM_PORTS, // memory ports
DCACHE_WRITEBACK, // write-back
false, // write response
DCACHE_MSHR_SIZE, // mshr size

View file

@ -15,11 +15,11 @@
using namespace vortex;
LocalMemDemux::LocalMemDemux(
LocalMemSwitch::LocalMemSwitch(
const SimContext& ctx,
const char* name,
uint32_t delay
) : SimObject<LocalMemDemux>(ctx, name)
) : SimObject<LocalMemSwitch>(ctx, name)
, ReqIn(this)
, RspIn(this)
, ReqLmem(this)
@ -29,9 +29,9 @@ LocalMemDemux::LocalMemDemux(
, delay_(delay)
{}
void LocalMemDemux::reset() {}
void LocalMemSwitch::reset() {}
void LocalMemDemux::tick() {
void LocalMemSwitch::tick() {
// process incoming responses
if (!RspLmem.empty()) {
auto& out_rsp = RspLmem.front();

View file

@ -466,19 +466,19 @@ private:
///////////////////////////////////////////////////////////////////////////////
template <typename Type>
class Mux : public SimObject<Mux<Type>> {
class Arbiter : public SimObject<Arbiter<Type>> {
public:
std::vector<SimPort<Type>> Inputs;
std::vector<SimPort<Type>> Outputs;
Mux(
Arbiter(
const SimContext& ctx,
const char* name,
ArbiterType type,
uint32_t num_inputs,
uint32_t num_outputs = 1,
uint32_t delay = 1
) : SimObject<Mux<Type>>(ctx, name)
) : SimObject<Arbiter<Type>>(ctx, name)
, Inputs(num_inputs, this)
, Outputs(num_outputs, this)
, type_(type)
@ -551,7 +551,7 @@ private:
///////////////////////////////////////////////////////////////////////////////
template <typename Req, typename Rsp>
class Switch : public SimObject<Switch<Req, Rsp>> {
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
public:
std::vector<SimPort<Req>> ReqIn;
std::vector<SimPort<Rsp>> RspIn;
@ -559,7 +559,7 @@ public:
std::vector<SimPort<Req>> ReqOut;
std::vector<SimPort<Rsp>> RspOut;
Switch(
TxArbiter(
const SimContext& ctx,
const char* name,
ArbiterType type,
@ -567,7 +567,7 @@ public:
uint32_t num_outputs = 1,
uint32_t delay = 1
)
: SimObject<Switch<Req, Rsp>>(ctx, name)
: SimObject<TxArbiter<Req, Rsp>>(ctx, name)
, ReqIn(num_inputs, this)
, RspIn(num_inputs, this)
, ReqOut(num_outputs, this)
@ -657,11 +657,11 @@ private:
uint32_t lg_num_reqs_;
};
using MemSwitch = Switch<MemReq, MemRsp>;
using MemArbiter = TxArbiter<MemReq, MemRsp>;
///////////////////////////////////////////////////////////////////////////////
class LocalMemDemux : public SimObject<LocalMemDemux> {
class LocalMemSwitch : public SimObject<LocalMemSwitch> {
public:
SimPort<LsuReq> ReqIn;
SimPort<LsuRsp> RspIn;
@ -672,7 +672,7 @@ public:
SimPort<LsuReq> ReqDC;
SimPort<LsuRsp> RspDC;
LocalMemDemux(
LocalMemSwitch(
const SimContext& ctx,
const char* name,
uint32_t delay

View file

@ -142,8 +142,8 @@ public:
if (future_.valid()) {
future_.wait();
}
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
delete mem_alloc_[i];
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
delete mem_alloc_[b];
}
if (ram_) {
delete ram_;
@ -187,8 +187,8 @@ public:
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
// initialize memory allocator
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
}
// reset the device
@ -257,8 +257,9 @@ public:
//printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset);
device_->s_axi_ctrl_awvalid = 1;
device_->s_axi_ctrl_awaddr = offset;
while (!device_->s_axi_ctrl_awready)
while (!device_->s_axi_ctrl_awready) {
this->tick();
}
this->tick();
device_->s_axi_ctrl_awvalid = 0;
@ -267,8 +268,9 @@ public:
device_->s_axi_ctrl_wvalid = 1;
device_->s_axi_ctrl_wdata = value;
device_->s_axi_ctrl_wstrb = 0xf;
while (!device_->s_axi_ctrl_wready)
while (!device_->s_axi_ctrl_wready) {
this->tick();
}
this->tick();
device_->s_axi_ctrl_wvalid = 0;
@ -290,8 +292,9 @@ public:
//printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset);
device_->s_axi_ctrl_arvalid = 1;
device_->s_axi_ctrl_araddr = offset;
while (!device_->s_axi_ctrl_arready)
while (!device_->s_axi_ctrl_arready) {
this->tick();
}
this->tick();
device_->s_axi_ctrl_arvalid = 0;
@ -318,9 +321,9 @@ private:
reqs.clear();
}
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
std::queue<mem_req_t*> empty;
std::swap(dram_queues_[i], empty);
std::swap(dram_queues_[b], empty);
}
device_->ap_rst_n = 0;
@ -335,10 +338,10 @@ private:
device_->ap_rst_n = 1;
// this AXI device is always ready to accept new requests
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
*m_axi_mem_[i].arready = 1;
*m_axi_mem_[i].awready = 1;
*m_axi_mem_[i].wready = 1;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
*m_axi_mem_[b].arready = 1;
*m_axi_mem_[b].awready = 1;
*m_axi_mem_[b].wready = 1;
}
}
@ -355,10 +358,10 @@ private:
dram_sim_.tick();
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
if (!dram_queues_[i].empty()) {
auto mem_req = dram_queues_[i].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
if (!dram_queues_[b].empty()) {
auto mem_req = dram_queues_[b].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
@ -366,7 +369,7 @@ private:
orig_req->ready = true;
}
}, mem_req)) {
dram_queues_[i].pop();
dram_queues_[b].pop();
}
}
}