mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
fixes to SimX's multiports memory support
This commit is contained in:
parent
3e4bbfc9f0
commit
3b454efd56
17 changed files with 280 additions and 234 deletions
|
@ -14,8 +14,6 @@
|
|||
`ifndef VX_CONFIG_VH
|
||||
`define VX_CONFIG_VH
|
||||
|
||||
|
||||
|
||||
`ifndef MIN
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
@ -170,8 +168,8 @@
|
|||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 2
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 1
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
@ -193,7 +191,7 @@
|
|||
`endif
|
||||
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 64'h0F0000000
|
||||
`endif
|
||||
|
||||
|
@ -218,7 +216,7 @@
|
|||
`endif
|
||||
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
|
||||
`endif
|
||||
|
||||
|
@ -303,13 +301,13 @@
|
|||
`ifndef VM_ADDR_MODE
|
||||
`define VM_ADDR_MODE SV32 //or BARE
|
||||
`endif
|
||||
`ifndef PT_LEVEL
|
||||
`ifndef PT_LEVEL
|
||||
`define PT_LEVEL (2)
|
||||
`endif
|
||||
`ifndef PTE_SIZE
|
||||
`define PTE_SIZE (4)
|
||||
`endif
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`define NUM_PTE_ENTRY (1024)
|
||||
`endif
|
||||
`ifndef PT_SIZE_LIMIT
|
||||
|
@ -319,13 +317,13 @@
|
|||
`ifndef VM_ADDR_MODE
|
||||
`define VM_ADDR_MODE SV39 //or BARE
|
||||
`endif
|
||||
`ifndef PT_LEVEL
|
||||
`ifndef PT_LEVEL
|
||||
`define PT_LEVEL (3)
|
||||
`endif
|
||||
`ifndef PTE_SIZE
|
||||
`define PTE_SIZE (8)
|
||||
`endif
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`define NUM_PTE_ENTRY (512)
|
||||
`endif
|
||||
`ifndef PT_SIZE_LIMIT
|
||||
|
@ -604,7 +602,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef DCACHE_NUM_BANKS
|
||||
`define DCACHE_NUM_BANKS `MIN(`NUM_LSU_LANES, 4)
|
||||
`define DCACHE_NUM_BANKS `MIN(DCACHE_NUM_REQS, 16)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -647,6 +645,15 @@
|
|||
`define DCACHE_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports
|
||||
`ifndef L1_MEM_PORTS
|
||||
`ifdef L1_DISABLE
|
||||
`define L1_MEM_PORTS `L2_MEM_PORTS
|
||||
`else
|
||||
`define L1_MEM_PORTS `MIN(`L2_MEM_PORTS, `DCACHE_NUM_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
|
@ -674,7 +681,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L2_NUM_BANKS
|
||||
`define L2_NUM_BANKS `MIN(4, `NUM_SOCKETS)
|
||||
`define L2_NUM_BANKS `MIN(L2_NUM_REQS, 16)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -717,6 +724,15 @@
|
|||
`define L2_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports
|
||||
`ifndef L2_MEM_PORTS
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_MEM_PORTS `MIN(`L3_MEM_PORTS, `L2_NUM_BANKS)
|
||||
`else
|
||||
`define L2_MEM_PORTS `L3_MEM_PORTS
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
|
@ -726,7 +742,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
|
||||
`define L3_NUM_BANKS `MIN(L3_NUM_REQS, 16)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -769,9 +785,13 @@
|
|||
`define L3_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports from LLC
|
||||
`ifndef NUM_MEM_PORTS
|
||||
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
|
||||
// Number of Memory Ports
|
||||
`ifndef L3_MEM_PORTS
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_MEM_PORTS `MIN(`PLATFORM_MEMORY_BANKS, `L3_NUM_BANKS)
|
||||
`else
|
||||
`define L3_MEM_PORTS `PLATFORM_MEMORY_BANKS
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -78,10 +78,10 @@ public:
|
|||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
_value = PLATFORM_MEMORY_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
|
|
|
@ -65,7 +65,7 @@ public:
|
|||
~vx_device() {
|
||||
#ifdef VM_ENABLE
|
||||
global_mem_.release(PAGE_TABLE_BASE_ADDR);
|
||||
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
|
||||
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
|
||||
// page_table_mem_->release(i->second << MEM_PAGE_SIZE);
|
||||
delete virtual_mem_;
|
||||
delete page_table_mem_;
|
||||
|
@ -113,10 +113,10 @@ public:
|
|||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
_value = PLATFORM_MEMORY_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
|
@ -164,7 +164,7 @@ public:
|
|||
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
|
||||
return 0;
|
||||
|
||||
// Now all conditions are not met. Return true because the address needs translation
|
||||
// Now all conditions are not met. Return true because the address needs translation
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,7 @@ public:
|
|||
#ifdef VM_ENABLE
|
||||
uint64_t pAddr = page_table_walk(dest_addr);
|
||||
// uint64_t pAddr;
|
||||
// try {
|
||||
// try {
|
||||
// pAddr = page_table_walk(dest_addr);
|
||||
// } catch ( Page_Fault_Exception ) {
|
||||
// // HW: place holder
|
||||
|
@ -466,18 +466,18 @@ public:
|
|||
CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), {
|
||||
return err;
|
||||
});
|
||||
|
||||
|
||||
if (virtual_mem_ == nullptr) {
|
||||
// virtual_mem_ does not intefere with physical mem, so no need to free space
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
if (VM_ADDR_MODE == BARE)
|
||||
DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)");
|
||||
else
|
||||
CHECK_ERR(alloc_page_table(&pt_addr),{return err;});
|
||||
|
||||
|
||||
CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;});
|
||||
return 0;
|
||||
}
|
||||
|
@ -604,7 +604,7 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
// Leaf node found.
|
||||
// Leaf node found.
|
||||
// Check RWX permissions according to access type.
|
||||
if (pte.r == 0)
|
||||
{
|
||||
|
|
|
@ -152,7 +152,9 @@ public:
|
|||
|
||||
// start
|
||||
device_->reset = 0;
|
||||
device_->mem_req_ready = 1;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
device_->mem_req_ready[b] = 1;
|
||||
}
|
||||
|
||||
// wait on device to go busy
|
||||
while (!device_->busy) {
|
||||
|
@ -186,11 +188,14 @@ private:
|
|||
this->dcr_bus_reset();
|
||||
|
||||
print_bufs_.clear();
|
||||
pending_mem_reqs_.clear();
|
||||
|
||||
{
|
||||
for (auto& reqs : pending_mem_reqs_) {
|
||||
reqs.clear();
|
||||
}
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_, empty);
|
||||
std::swap(dram_queue_[b], empty);
|
||||
}
|
||||
|
||||
device_->reset = 1;
|
||||
|
@ -217,17 +222,19 @@ private:
|
|||
|
||||
dram_sim_.tick();
|
||||
|
||||
if (!dram_queue_.empty()) {
|
||||
auto mem_req = dram_queue_.front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
if (!dram_queue_[b].empty()) {
|
||||
auto mem_req = dram_queue_[b].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_[b].pop();
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,101 +254,107 @@ private:
|
|||
}
|
||||
|
||||
void mem_bus_reset() {
|
||||
device_->mem_req_ready = 0;
|
||||
device_->mem_rsp_valid = 0;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
device_->mem_req_ready[b] = 0;
|
||||
device_->mem_rsp_valid[b] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void mem_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = device_->mem_rsp_ready;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
mem_rd_rsp_ready_[b] = device_->mem_rsp_ready[b];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// process memory read responses
|
||||
if (device_->mem_rsp_valid && mem_rd_rsp_ready_) {
|
||||
device_->mem_rsp_valid = 0;
|
||||
}
|
||||
if (!device_->mem_rsp_valid) {
|
||||
if (!pending_mem_reqs_.empty()
|
||||
&& (*pending_mem_reqs_.begin())->ready) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
device_->mem_rsp_valid = 1;
|
||||
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_rsp->tag;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
delete mem_rsp;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
// process memory read responses
|
||||
if (device_->mem_rsp_valid[b] && mem_rd_rsp_ready_[b]) {
|
||||
device_->mem_rsp_valid[b] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid && device_->mem_req_ready) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw) {
|
||||
auto byteen = device_->mem_req_byteen;
|
||||
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
if (!device_->mem_rsp_valid[b]) {
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
&& (*pending_mem_reqs_[b].begin())->ready) {
|
||||
auto mem_rsp_it = pending_mem_reqs_[b].begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
printf("%02x", mem_rsp->data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
device_->mem_rsp_valid[b] = 1;
|
||||
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data[b]), mem_rsp->data.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag[b] = mem_rsp->tag;
|
||||
pending_mem_reqs_[b].erase(mem_rsp_it);
|
||||
delete mem_rsp;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid[b] && device_->mem_req_ready[b]) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr[b] * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw[b]) {
|
||||
auto byteen = device_->mem_req_byteen[b];
|
||||
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data[b]);
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag[b];
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
|
||||
// send dram request
|
||||
dram_queue_[b].push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag;
|
||||
mem_req->tag = device_->mem_req_tag[b];
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
dram_queue_[b].push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag;
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -369,21 +382,21 @@ private:
|
|||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_;
|
||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
std::queue<mem_req_t*> dram_queue_;
|
||||
std::queue<mem_req_t*> dram_queue_[PLATFORM_MEMORY_BANKS];
|
||||
|
||||
std::array<bool, PLATFORM_MEMORY_BANKS> mem_rd_rsp_ready_;
|
||||
|
||||
DramSim dram_sim_;
|
||||
|
||||
VVortex* device_;
|
||||
|
||||
RAM* ram_;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *tfp_;
|
||||
#endif
|
||||
|
||||
bool mem_rd_rsp_ready_;
|
||||
|
||||
RAM* ram_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,12 +24,12 @@ public:
|
|||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_caches,
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_caches,
|
||||
uint32_t num_requests,
|
||||
const CacheSim::Config& cache_config)
|
||||
const CacheSim::Config& cache_config)
|
||||
: SimObject(ctx, name)
|
||||
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||
|
@ -44,21 +44,21 @@ public:
|
|||
}
|
||||
|
||||
char sname[100];
|
||||
|
||||
std::vector<MemSwitch::Ptr> input_arbs(num_inputs);
|
||||
|
||||
std::vector<MemArbiter::Ptr> input_arbs(num_inputs);
|
||||
for (uint32_t j = 0; j < num_inputs; ++j) {
|
||||
snprintf(sname, 100, "%s-input-arb%d", name, j);
|
||||
input_arbs.at(j) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
|
||||
input_arbs.at(j) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
|
||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(j)->ReqIn.at(i));
|
||||
input_arbs.at(j)->RspIn.at(i).bind(&this->CoreRspPorts.at(j).at(i));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MemSwitch::Ptr> mem_arbs(cache_config.num_inputs);
|
||||
std::vector<MemArbiter::Ptr> mem_arbs(cache_config.num_inputs);
|
||||
for (uint32_t i = 0; i < cache_config.num_inputs; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb%d", name, i);
|
||||
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
|
||||
mem_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
|
||||
for (uint32_t j = 0; j < num_inputs; ++j) {
|
||||
input_arbs.at(j)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(j));
|
||||
mem_arbs.at(i)->RspIn.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
|
||||
|
@ -66,7 +66,7 @@ public:
|
|||
}
|
||||
|
||||
snprintf(sname, 100, "%s-cache-arb", name);
|
||||
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
auto cache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
|
||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||
|
@ -88,14 +88,14 @@ public:
|
|||
~CacheCluster() {}
|
||||
|
||||
void reset() {}
|
||||
|
||||
|
||||
void tick() {}
|
||||
|
||||
CacheSim::PerfStats perf_stats() const {
|
||||
CacheSim::PerfStats perf;
|
||||
for (auto cache : caches_) {
|
||||
perf += cache->perf_stats();
|
||||
}
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
|
|
|
@ -305,8 +305,8 @@ private:
|
|||
Config config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
MemSwitch::Ptr bank_switch_;
|
||||
MemSwitch::Ptr bypass_switch_;
|
||||
MemArbiter::Ptr bank_arb_;
|
||||
MemArbiter::Ptr bypass_arb_;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports_;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
|
||||
std::vector<bank_req_t> pipeline_reqs_;
|
||||
|
@ -330,33 +330,33 @@ public:
|
|||
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
|
||||
|
||||
if (config_.bypass) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
|
||||
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
|
||||
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
|
||||
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
simobject->CoreReqPorts.at(i).bind(&bypass_arb_->ReqIn.at(i));
|
||||
bypass_arb_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
}
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
|
||||
return;
|
||||
}
|
||||
|
||||
if (strcmp(simobject->name().c_str(), "l3cache")) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
|
||||
|
||||
if (config.B != 0) {
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
||||
bank_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
mem_req_ports_.at(i).bind(&bank_arb_->ReqIn.at(i));
|
||||
bank_arb_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
bank_arb_->ReqOut.at(0).bind(&bypass_arb_->ReqIn.at(0));
|
||||
bypass_arb_->RspIn.at(0).bind(&bank_arb_->RspOut.at(0));
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
mem_req_ports_.at(0).bind(&bypass_arb_->ReqIn.at(0));
|
||||
bypass_arb_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
} else {
|
||||
// TODO: Change this into a crossbar
|
||||
|
@ -364,45 +364,45 @@ public:
|
|||
//printf("%s connecting\n", simobject_->name().c_str());
|
||||
//3
|
||||
if (config.B != 0) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
|
||||
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, max, max);
|
||||
for (uint32_t i = 0; i < max; ++i) {
|
||||
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
|
||||
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
|
||||
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
|
||||
bypass_arb_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
|
||||
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_arb_->RspOut.at(i));
|
||||
}
|
||||
} else {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
|
||||
}
|
||||
|
||||
if (config.B != 0)
|
||||
{
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
|
||||
bank_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
|
||||
{
|
||||
//1
|
||||
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
mem_req_ports_.at(i).bind(&bank_arb_->ReqIn.at(i));
|
||||
bank_arb_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
//2
|
||||
if (config_.num_inputs > 1) {
|
||||
for (uint32_t i = 0; i < max; ++i) {
|
||||
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
|
||||
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
|
||||
bank_arb_->ReqOut.at(i % (1 << config.B)).bind(&bypass_arb_->ReqIn.at(i));
|
||||
bypass_arb_->RspIn.at(i).bind(&bank_arb_->RspOut.at(i % (1 << config.B)));
|
||||
}
|
||||
} else {
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
bank_arb_->ReqOut.at(0).bind(&bypass_arb_->ReqIn.at(0));
|
||||
bypass_arb_->RspIn.at(0).bind(&bank_arb_->RspOut.at(0));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
mem_req_ports_.at(0).bind(&bypass_arb_->ReqIn.at(0));
|
||||
bypass_arb_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -435,7 +435,7 @@ public:
|
|||
|
||||
// handle cache bypasss responses
|
||||
{
|
||||
auto& bypass_port = bypass_switch_->RspIn.at(1);
|
||||
auto& bypass_port = bypass_arb_->RspIn.at(1);
|
||||
if (!bypass_port.empty()) {
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
this->processBypassResponse(mem_rsp);
|
||||
|
@ -568,7 +568,7 @@ private:
|
|||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).push(mem_req, 1);
|
||||
bypass_arb_->ReqIn.at(1).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
|
||||
}
|
||||
|
||||
|
@ -743,8 +743,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
|
|||
: SimObject<CacheSim>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPorts(NUM_MEM_PORTS, this)
|
||||
, MemRspPorts(NUM_MEM_PORTS, this)
|
||||
, MemReqPorts(config.mem_ports, this)
|
||||
, MemRspPorts(config.mem_ports, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ public:
|
|||
uint8_t addr_width; // word address bits
|
||||
uint8_t ports_per_bank; // number of ports per bank
|
||||
uint8_t num_inputs; // number of inputs
|
||||
uint8_t mem_ports; // memory ports
|
||||
bool write_back; // is write-back
|
||||
bool write_reponse; // enable write response
|
||||
uint16_t mshr_size; // MSHR buffer size
|
||||
|
|
|
@ -36,10 +36,10 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
// create sockets
|
||||
|
||||
snprintf(sname, 100, "cluster%d-icache-arb", cluster_id);
|
||||
auto icache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
|
||||
auto icache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
|
||||
|
||||
snprintf(sname, 100, "cluster%d-dcache-arb", cluster_id);
|
||||
auto dcache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
|
||||
auto dcache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
|
||||
|
||||
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
|
||||
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
|
||||
|
@ -48,11 +48,11 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
arch,
|
||||
dcrs);
|
||||
|
||||
socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i));
|
||||
icache_switch->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
|
||||
socket->icache_mem_req_port.bind(&icache_arb->ReqIn.at(i));
|
||||
icache_arb->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
|
||||
|
||||
socket->dcache_mem_req_port.bind(&dcache_switch->ReqIn.at(i));
|
||||
dcache_switch->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
|
||||
socket->dcache_mem_req_port.bind(&dcache_arb->ReqIn.at(i));
|
||||
dcache_arb->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
|
||||
|
||||
sockets_.at(i) = socket;
|
||||
}
|
||||
|
@ -69,7 +69,8 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
log2ceil(L2_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
2, // request size
|
||||
L2_NUM_REQS, // request size
|
||||
L2_MEM_PORTS, // memory ports
|
||||
L2_WRITEBACK, // write-back
|
||||
false, // write response
|
||||
L2_MSHR_SIZE, // mshr size
|
||||
|
@ -79,11 +80,11 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
|
||||
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
|
||||
|
||||
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
|
||||
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
|
||||
icache_arb->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
|
||||
l2cache_->CoreRspPorts.at(0).bind(&icache_arb->RspOut.at(0));
|
||||
|
||||
dcache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
|
||||
l2cache_->CoreRspPorts.at(1).bind(&dcache_switch->RspOut.at(0));
|
||||
dcache_arb->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
|
||||
l2cache_->CoreRspPorts.at(1).bind(&dcache_arb->RspOut.at(0));
|
||||
}
|
||||
|
||||
Cluster::~Cluster() {
|
||||
|
|
|
@ -27,10 +27,15 @@ inline constexpr int LSU_WORD_SIZE = (XLEN / 8);
|
|||
inline constexpr int LSU_CHANNELS = NUM_LSU_LANES;
|
||||
inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS);
|
||||
|
||||
// The dcache uses coalesced memory blocks
|
||||
inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE;
|
||||
inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE);
|
||||
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
|
||||
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
|
||||
|
||||
inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE);
|
||||
|
||||
inline constexpr int L2_NUM_REQS = 2;
|
||||
|
||||
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS;
|
||||
|
||||
inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH;
|
|
@ -76,7 +76,7 @@ Core::Core(const SimContext& ctx,
|
|||
// create lsu demux
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
|
||||
lsu_demux_.at(i) = LocalMemDemux::Create(sname, 1);
|
||||
lsu_demux_.at(i) = LocalMemSwitch::Create(sname, 1);
|
||||
}
|
||||
|
||||
// create lsu dcache adapter
|
||||
|
@ -130,7 +130,7 @@ Core::Core(const SimContext& ctx,
|
|||
dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES);
|
||||
dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES);
|
||||
dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES);
|
||||
|
||||
|
||||
// initialize execute units
|
||||
func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
|
||||
func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
|
||||
|
@ -141,7 +141,7 @@ Core::Core(const SimContext& ctx,
|
|||
// bind commit arbiters
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
|
||||
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
|
||||
auto arbiter = TraceArbiter::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
|
||||
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
|
||||
func_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ class Socket;
|
|||
class Arch;
|
||||
class DCRS;
|
||||
|
||||
using TraceSwitch = Mux<instr_trace_t*>;
|
||||
using TraceArbiter = Arbiter<instr_trace_t*>;
|
||||
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
|
@ -154,7 +154,7 @@ private:
|
|||
std::vector<Dispatcher::Ptr> dispatchers_;
|
||||
std::vector<FuncUnit::Ptr> func_units_;
|
||||
LocalMem::Ptr local_mem_;
|
||||
std::vector<LocalMemDemux::Ptr> lsu_demux_;
|
||||
std::vector<LocalMemSwitch::Ptr> lsu_demux_;
|
||||
std::vector<MemCoalescer::Ptr> mem_coalescers_;
|
||||
std::vector<LsuMemAdapter::Ptr> lsu_dcache_adapter_;
|
||||
std::vector<LsuMemAdapter::Ptr> lsu_lmem_adapter_;
|
||||
|
@ -169,7 +169,7 @@ private:
|
|||
|
||||
PerfStats perf_stats_;
|
||||
|
||||
std::vector<TraceSwitch::Ptr> commit_arbs_;
|
||||
std::vector<TraceArbiter::Ptr> commit_arbs_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
uint32_t ibuffer_idx_;
|
||||
|
|
|
@ -59,7 +59,7 @@ public:
|
|||
dram_sim_.tick();
|
||||
uint32_t counter = 0;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
for (uint32_t i = 0; i < config_.channels; ++i) {
|
||||
if (simobject_->MemReqPorts.at(i).empty())
|
||||
continue;
|
||||
|
||||
|
@ -107,8 +107,8 @@ public:
|
|||
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPorts(NUM_MEM_PORTS, this)
|
||||
, MemRspPorts(NUM_MEM_PORTS, this)
|
||||
, MemReqPorts(config.channels, this)
|
||||
, MemRspPorts(config.channels, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
|
||||
// create memory simulator
|
||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||
MEMORY_BANKS,
|
||||
PLATFORM_MEMORY_BANKS,
|
||||
uint32_t(arch.num_cores()) * arch.num_clusters()
|
||||
});
|
||||
|
||||
|
@ -38,7 +38,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
log2ceil(L3_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
uint8_t(arch.num_clusters()), // request size
|
||||
L3_NUM_REQS, // request size
|
||||
L3_MEM_PORTS, // memory ports
|
||||
L3_WRITEBACK, // write-back
|
||||
false, // write response
|
||||
L3_MSHR_SIZE, // mshr size
|
||||
|
@ -47,7 +48,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
);
|
||||
|
||||
// connect L3 memory ports
|
||||
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
|
||||
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
|
||||
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
|
||||
}
|
||||
|
@ -61,11 +62,11 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
}
|
||||
|
||||
// set up memory profiling
|
||||
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
|
||||
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_pending_reads_ += !req.write;
|
||||
});
|
||||
memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){
|
||||
|
|
|
@ -44,6 +44,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
1, // number of inputs
|
||||
1, // memory ports
|
||||
false, // write-back
|
||||
false, // write response
|
||||
(uint8_t)arch.num_warps(), // mshr size
|
||||
|
@ -64,6 +65,7 @@ Socket::Socket(const SimContext& ctx,
|
|||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
DCACHE_NUM_REQS, // number of inputs
|
||||
L1_MEM_PORTS, // memory ports
|
||||
DCACHE_WRITEBACK, // write-back
|
||||
false, // write response
|
||||
DCACHE_MSHR_SIZE, // mshr size
|
||||
|
|
|
@ -15,11 +15,11 @@
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
LocalMemDemux::LocalMemDemux(
|
||||
LocalMemSwitch::LocalMemSwitch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay
|
||||
) : SimObject<LocalMemDemux>(ctx, name)
|
||||
) : SimObject<LocalMemSwitch>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqLmem(this)
|
||||
|
@ -29,9 +29,9 @@ LocalMemDemux::LocalMemDemux(
|
|||
, delay_(delay)
|
||||
{}
|
||||
|
||||
void LocalMemDemux::reset() {}
|
||||
void LocalMemSwitch::reset() {}
|
||||
|
||||
void LocalMemDemux::tick() {
|
||||
void LocalMemSwitch::tick() {
|
||||
// process incoming responses
|
||||
if (!RspLmem.empty()) {
|
||||
auto& out_rsp = RspLmem.front();
|
||||
|
|
|
@ -466,19 +466,19 @@ private:
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Type>
|
||||
class Mux : public SimObject<Mux<Type>> {
|
||||
class Arbiter : public SimObject<Arbiter<Type>> {
|
||||
public:
|
||||
std::vector<SimPort<Type>> Inputs;
|
||||
std::vector<SimPort<Type>> Outputs;
|
||||
|
||||
Mux(
|
||||
Arbiter(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t delay = 1
|
||||
) : SimObject<Mux<Type>>(ctx, name)
|
||||
) : SimObject<Arbiter<Type>>(ctx, name)
|
||||
, Inputs(num_inputs, this)
|
||||
, Outputs(num_outputs, this)
|
||||
, type_(type)
|
||||
|
@ -551,7 +551,7 @@ private:
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
class Switch : public SimObject<Switch<Req, Rsp>> {
|
||||
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
|
||||
public:
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
@ -559,7 +559,7 @@ public:
|
|||
std::vector<SimPort<Req>> ReqOut;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
|
||||
Switch(
|
||||
TxArbiter(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
|
@ -567,7 +567,7 @@ public:
|
|||
uint32_t num_outputs = 1,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<Switch<Req, Rsp>>(ctx, name)
|
||||
: SimObject<TxArbiter<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
, RspIn(num_inputs, this)
|
||||
, ReqOut(num_outputs, this)
|
||||
|
@ -657,11 +657,11 @@ private:
|
|||
uint32_t lg_num_reqs_;
|
||||
};
|
||||
|
||||
using MemSwitch = Switch<MemReq, MemRsp>;
|
||||
using MemArbiter = TxArbiter<MemReq, MemRsp>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LocalMemDemux : public SimObject<LocalMemDemux> {
|
||||
class LocalMemSwitch : public SimObject<LocalMemSwitch> {
|
||||
public:
|
||||
SimPort<LsuReq> ReqIn;
|
||||
SimPort<LsuRsp> RspIn;
|
||||
|
@ -672,7 +672,7 @@ public:
|
|||
SimPort<LsuReq> ReqDC;
|
||||
SimPort<LsuRsp> RspDC;
|
||||
|
||||
LocalMemDemux(
|
||||
LocalMemSwitch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay
|
||||
|
|
|
@ -142,8 +142,8 @@ public:
|
|||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
delete mem_alloc_[i];
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
delete mem_alloc_[b];
|
||||
}
|
||||
if (ram_) {
|
||||
delete ram_;
|
||||
|
@ -187,8 +187,8 @@ public:
|
|||
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
|
||||
|
||||
// initialize memory allocator
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||
}
|
||||
|
||||
// reset the device
|
||||
|
@ -257,8 +257,9 @@ public:
|
|||
//printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset);
|
||||
device_->s_axi_ctrl_awvalid = 1;
|
||||
device_->s_axi_ctrl_awaddr = offset;
|
||||
while (!device_->s_axi_ctrl_awready)
|
||||
while (!device_->s_axi_ctrl_awready) {
|
||||
this->tick();
|
||||
}
|
||||
this->tick();
|
||||
device_->s_axi_ctrl_awvalid = 0;
|
||||
|
||||
|
@ -267,8 +268,9 @@ public:
|
|||
device_->s_axi_ctrl_wvalid = 1;
|
||||
device_->s_axi_ctrl_wdata = value;
|
||||
device_->s_axi_ctrl_wstrb = 0xf;
|
||||
while (!device_->s_axi_ctrl_wready)
|
||||
while (!device_->s_axi_ctrl_wready) {
|
||||
this->tick();
|
||||
}
|
||||
this->tick();
|
||||
device_->s_axi_ctrl_wvalid = 0;
|
||||
|
||||
|
@ -290,8 +292,9 @@ public:
|
|||
//printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset);
|
||||
device_->s_axi_ctrl_arvalid = 1;
|
||||
device_->s_axi_ctrl_araddr = offset;
|
||||
while (!device_->s_axi_ctrl_arready)
|
||||
while (!device_->s_axi_ctrl_arready) {
|
||||
this->tick();
|
||||
}
|
||||
this->tick();
|
||||
device_->s_axi_ctrl_arvalid = 0;
|
||||
|
||||
|
@ -318,9 +321,9 @@ private:
|
|||
reqs.clear();
|
||||
}
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queues_[i], empty);
|
||||
std::swap(dram_queues_[b], empty);
|
||||
}
|
||||
|
||||
device_->ap_rst_n = 0;
|
||||
|
@ -335,10 +338,10 @@ private:
|
|||
device_->ap_rst_n = 1;
|
||||
|
||||
// this AXI device is always ready to accept new requests
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
*m_axi_mem_[i].arready = 1;
|
||||
*m_axi_mem_[i].awready = 1;
|
||||
*m_axi_mem_[i].wready = 1;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
*m_axi_mem_[b].arready = 1;
|
||||
*m_axi_mem_[b].awready = 1;
|
||||
*m_axi_mem_[b].wready = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,10 +358,10 @@ private:
|
|||
|
||||
dram_sim_.tick();
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
if (!dram_queues_[i].empty()) {
|
||||
auto mem_req = dram_queues_[i].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
if (!dram_queues_[b].empty()) {
|
||||
auto mem_req = dram_queues_[b].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
|
@ -366,7 +369,7 @@ private:
|
|||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queues_[i].pop();
|
||||
dram_queues_[b].pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue