#include "processor.h" #include "core.h" #include "constants.h" using namespace vortex; class Processor::Impl { private: std::vector cores_; std::vector l2caches_; std::vector::Ptr> l2_mem_switches_; CacheSim::Ptr l3cache_; Switch::Ptr l3_mem_switch_; std::vector raster_units_; std::vector rop_units_; DCRS dcrs_; public: Impl(const Arch& arch) : cores_(arch.num_cores()) , l2caches_(NUM_CLUSTERS) , l2_mem_switches_(NUM_CLUSTERS) , raster_units_(NUM_CLUSTERS) , rop_units_(NUM_CLUSTERS) { SimPlatform::instance().initialize(); uint32_t num_cores = arch.num_cores(); uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS; // create gpu blocks for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { raster_units_.at(i) = RasterUnit::Create("raster_unit", arch, dcrs_.raster_dcrs, RASTER_TILE_LOGSIZE, RASTER_BLOCK_LOGSIZE); rop_units_.at(i) = RopUnit::Create("rop_unit", arch, dcrs_.rop_dcrs); } // create cores for (uint32_t i = 0; i < num_cores; ++i) { auto j = i / cores_per_cluster; cores_.at(i) = Core::Create(i, arch, dcrs_, raster_units_.at(j), rop_units_.at(j)); } // setup memory simulator auto memsim = MemSim::Create("dram", MemSim::Config{ MEMORY_BANKS, arch.num_cores() }); std::vector*> mem_req_ports(1, &memsim->MemReqPort); std::vector*> mem_rsp_ports(1, &memsim->MemRspPort); if (L3_ENABLE) { l3cache_ = CacheSim::Create("l3cache", CacheSim::Config{ log2ceil(L3_CACHE_SIZE), // C log2ceil(MEM_BLOCK_SIZE), // B 2, // W 0, // A 32, // address bits L3_NUM_BANKS, // number of banks L3_NUM_PORTS, // number of ports NUM_CLUSTERS, // request size true, // write-through false, // write response 0, // victim size L3_MSHR_SIZE, // mshr 2, // pipeline latency } ); l3cache_->MemReqPort.bind(mem_req_ports.at(0)); mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort); mem_req_ports.resize(NUM_CLUSTERS); mem_rsp_ports.resize(NUM_CLUSTERS); for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i); mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i); } } else if (NUM_CLUSTERS > 1) { l3_mem_switch_ = Switch::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS); l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0)); mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn); mem_req_ports.resize(NUM_CLUSTERS); mem_rsp_ports.resize(NUM_CLUSTERS); for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i); mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i); } } for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) { std::vector*> cluster_mem_req_ports(cores_per_cluster); std::vector*> cluster_mem_rsp_ports(cores_per_cluster); if (L2_ENABLE) { auto& l2cache = l2caches_.at(i); l2cache = CacheSim::Create("l2cache", CacheSim::Config{ log2ceil(L2_CACHE_SIZE), // C log2ceil(MEM_BLOCK_SIZE), // B 2, // W 0, // A 32, // address bits L2_NUM_BANKS, // number of banks L2_NUM_PORTS, // number of ports (uint8_t)cores_per_cluster, // request size true, // write-through false, // write response 0, // victim size L2_MSHR_SIZE, // mshr 2, // pipeline latency }); l2cache->MemReqPort.bind(mem_req_ports.at(i)); mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort); for (uint32_t j = 0; j < cores_per_cluster; ++j) { cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j); cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j); } } else { auto& l2_mem_switch = l2_mem_switches_.at(i); l2_mem_switch = Switch::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster); l2_mem_switch->ReqOut.bind(mem_req_ports.at(i)); mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn); for (uint32_t j = 0; j < cores_per_cluster; ++j) { cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j); cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j); } } for (uint32_t j = 0; j < cores_per_cluster; ++j) { auto& core = cores_.at((i * cores_per_cluster) + j); core->MemReqPort.bind(cluster_mem_req_ports.at(j)); cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort); } } } ~Impl() { SimPlatform::instance().finalize(); } void attach_ram(RAM* ram) { for (auto core : cores_) { core->attach_ram(ram); } for (auto raster_unit : raster_units_) { raster_unit->attach_ram(ram); } for (auto rop_unit : rop_units_) { rop_unit->attach_ram(ram); } } int run() { SimPlatform::instance().reset(); bool running; int exitcode = 0; do { SimPlatform::instance().tick(); running = false; for (auto& core : cores_) { if (core->running()) { running = true; } if (core->check_exit()) { exitcode = core->getIRegValue(3); running = false; break; } } } while (running); return exitcode; } void write_dcr(uint32_t addr, uint64_t value) { dcrs_.write(addr, value); } }; /////////////////////////////////////////////////////////////////////////////// Processor::Processor(const Arch& arch) : impl_(new Impl(arch)) {} Processor::~Processor() { delete impl_; } void Processor::attach_ram(RAM* mem) { impl_->attach_ram(mem); } int Processor::run() { return impl_->run(); } void Processor::write_dcr(uint32_t addr, uint64_t value) { return impl_->write_dcr(addr, value); }