mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
204 lines
No EOL
6.4 KiB
C++
204 lines
No EOL
6.4 KiB
C++
#include "processor.h"
|
|
#include "core.h"
|
|
#include "constants.h"
|
|
|
|
using namespace vortex;
|
|
|
|
class Processor::Impl {
|
|
private:
|
|
std::vector<Core::Ptr> cores_;
|
|
std::vector<CacheSim::Ptr> l2caches_;
|
|
std::vector<Switch<MemReq, MemRsp>::Ptr> l2_mem_switches_;
|
|
CacheSim::Ptr l3cache_;
|
|
Switch<MemReq, MemRsp>::Ptr l3_mem_switch_;
|
|
std::vector<RasterUnit::Ptr> raster_units_;
|
|
std::vector<RopUnit::Ptr> rop_units_;
|
|
DCRS dcrs_;
|
|
|
|
public:
|
|
Impl(const Arch& arch)
|
|
: cores_(arch.num_cores())
|
|
, l2caches_(NUM_CLUSTERS)
|
|
, l2_mem_switches_(NUM_CLUSTERS)
|
|
, raster_units_(NUM_CLUSTERS)
|
|
, rop_units_(NUM_CLUSTERS)
|
|
{
|
|
SimPlatform::instance().initialize();
|
|
|
|
uint32_t num_cores = arch.num_cores();
|
|
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
|
|
|
// create gpu blocks
|
|
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
|
raster_units_.at(i) = RasterUnit::Create("raster_unit", arch, dcrs_.raster_dcrs, RASTER_TILE_LOGSIZE, RASTER_BLOCK_LOGSIZE);
|
|
rop_units_.at(i) = RopUnit::Create("rop_unit", arch, dcrs_.rop_dcrs);
|
|
}
|
|
|
|
// create cores
|
|
for (uint32_t i = 0; i < num_cores; ++i) {
|
|
auto j = i / cores_per_cluster;
|
|
cores_.at(i) = Core::Create(i, arch, dcrs_, raster_units_.at(j), rop_units_.at(j));
|
|
}
|
|
|
|
// setup memory simulator
|
|
auto memsim = MemSim::Create("dram", MemSim::Config{
|
|
MEMORY_BANKS,
|
|
arch.num_cores()
|
|
});
|
|
|
|
std::vector<SimPort<MemReq>*> mem_req_ports(1, &memsim->MemReqPort);
|
|
std::vector<SimPort<MemRsp>*> mem_rsp_ports(1, &memsim->MemRspPort);
|
|
|
|
if (L3_ENABLE) {
|
|
l3cache_ = CacheSim::Create("l3cache", CacheSim::Config{
|
|
log2ceil(L3_CACHE_SIZE), // C
|
|
log2ceil(MEM_BLOCK_SIZE), // B
|
|
2, // W
|
|
0, // A
|
|
32, // address bits
|
|
L3_NUM_BANKS, // number of banks
|
|
L3_NUM_PORTS, // number of ports
|
|
NUM_CLUSTERS, // request size
|
|
true, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
L3_MSHR_SIZE, // mshr
|
|
2, // pipeline latency
|
|
}
|
|
);
|
|
l3cache_->MemReqPort.bind(mem_req_ports.at(0));
|
|
mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort);
|
|
|
|
mem_req_ports.resize(NUM_CLUSTERS);
|
|
mem_rsp_ports.resize(NUM_CLUSTERS);
|
|
|
|
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
|
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
|
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
|
}
|
|
} else if (NUM_CLUSTERS > 1) {
|
|
l3_mem_switch_ = Switch<MemReq, MemRsp>::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS);
|
|
l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0));
|
|
mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn);
|
|
|
|
mem_req_ports.resize(NUM_CLUSTERS);
|
|
mem_rsp_ports.resize(NUM_CLUSTERS);
|
|
|
|
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
|
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
|
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
|
std::vector<SimPort<MemReq>*> cluster_mem_req_ports(cores_per_cluster);
|
|
std::vector<SimPort<MemRsp>*> cluster_mem_rsp_ports(cores_per_cluster);
|
|
|
|
if (L2_ENABLE) {
|
|
auto& l2cache = l2caches_.at(i);
|
|
l2cache = CacheSim::Create("l2cache", CacheSim::Config{
|
|
log2ceil(L2_CACHE_SIZE), // C
|
|
log2ceil(MEM_BLOCK_SIZE), // B
|
|
2, // W
|
|
0, // A
|
|
32, // address bits
|
|
L2_NUM_BANKS, // number of banks
|
|
L2_NUM_PORTS, // number of ports
|
|
(uint8_t)cores_per_cluster, // request size
|
|
true, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
L2_MSHR_SIZE, // mshr
|
|
2, // pipeline latency
|
|
});
|
|
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
|
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
|
|
|
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
|
cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
|
cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
|
}
|
|
} else {
|
|
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
|
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster);
|
|
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
|
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
|
|
|
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
|
cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
|
cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
|
}
|
|
}
|
|
|
|
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
|
auto& core = cores_.at((i * cores_per_cluster) + j);
|
|
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
|
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
|
}
|
|
}
|
|
}
|
|
|
|
~Impl() {
|
|
SimPlatform::instance().finalize();
|
|
}
|
|
|
|
void attach_ram(RAM* ram) {
|
|
for (auto core : cores_) {
|
|
core->attach_ram(ram);
|
|
}
|
|
for (auto raster_unit : raster_units_) {
|
|
raster_unit->attach_ram(ram);
|
|
}
|
|
for (auto rop_unit : rop_units_) {
|
|
rop_unit->attach_ram(ram);
|
|
}
|
|
}
|
|
|
|
int run() {
|
|
SimPlatform::instance().reset();
|
|
bool running;
|
|
int exitcode = 0;
|
|
do {
|
|
SimPlatform::instance().tick();
|
|
running = false;
|
|
for (auto& core : cores_) {
|
|
if (core->running()) {
|
|
running = true;
|
|
}
|
|
if (core->check_exit()) {
|
|
exitcode = core->getIRegValue(3);
|
|
running = false;
|
|
break;
|
|
}
|
|
}
|
|
} while (running);
|
|
|
|
return exitcode;
|
|
}
|
|
|
|
void write_dcr(uint32_t addr, uint64_t value) {
|
|
dcrs_.write(addr, value);
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
Processor::Processor(const Arch& arch)
|
|
: impl_(new Impl(arch))
|
|
{}
|
|
|
|
Processor::~Processor() {
|
|
delete impl_;
|
|
}
|
|
|
|
void Processor::attach_ram(RAM* mem) {
|
|
impl_->attach_ram(mem);
|
|
}
|
|
|
|
int Processor::run() {
|
|
return impl_->run();
|
|
}
|
|
|
|
void Processor::write_dcr(uint32_t addr, uint64_t value) {
|
|
return impl_->write_dcr(addr, value);
|
|
} |