mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
302 lines
No EOL
10 KiB
C++
302 lines
No EOL
10 KiB
C++
#include "cluster.h"
|
|
|
|
using namespace vortex;
|
|
|
|
Cluster::Cluster(uint32_t cluster_id, uint32_t cores_per_cluster, ProcessorImpl* processor, const Arch &arch, const DCRS &dcrs)
|
|
: cluster_id_(cluster_id)
|
|
, cores_(cores_per_cluster)
|
|
, raster_units_(NUM_RASTER_UNITS)
|
|
, rop_units_(NUM_ROP_UNITS)
|
|
, tex_units_(NUM_TEX_UNITS)
|
|
, sharedmems_(cores_per_cluster)
|
|
, processor_(processor)
|
|
{
|
|
char sname[100];
|
|
|
|
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
|
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
|
!L2_ENABLED,
|
|
log2ceil(L2_CACHE_SIZE), // C
|
|
log2ceil(MEM_BLOCK_SIZE), // B
|
|
log2ceil(L2_NUM_WAYS), // W
|
|
0, // A
|
|
32, // address bits
|
|
L2_NUM_BANKS, // number of banks
|
|
L2_NUM_PORTS, // number of ports
|
|
5, // request size
|
|
true, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
L2_MSHR_SIZE, // mshr
|
|
2, // pipeline latency
|
|
});
|
|
|
|
snprintf(sname, 100, "cluster%d-icaches", cluster_id);
|
|
icaches_ = CacheCluster::Create(sname, cores_per_cluster, NUM_ICACHES, CacheSim::Config{
|
|
!ICACHE_ENABLED,
|
|
log2ceil(ICACHE_SIZE), // C
|
|
log2ceil(L1_BLOCK_SIZE),// B
|
|
log2ceil(sizeof(uint32_t)), // W
|
|
log2ceil(ICACHE_NUM_WAYS),// A
|
|
32, // address bits
|
|
1, // number of banks
|
|
1, // number of ports
|
|
1, // number of requests
|
|
true, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
(uint8_t)arch.num_warps(), // mshr
|
|
2, // pipeline latency
|
|
});
|
|
|
|
icaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(0));
|
|
l2cache_->CoreRspPorts.at(0).bind(&icaches_->MemRspPort);
|
|
|
|
snprintf(sname, 100, "cluster%d-dcaches", cluster_id);
|
|
dcaches_ = CacheCluster::Create(sname, cores_per_cluster, NUM_DCACHES, CacheSim::Config{
|
|
!DCACHE_ENABLED,
|
|
log2ceil(DCACHE_SIZE), // C
|
|
log2ceil(L1_BLOCK_SIZE),// B
|
|
log2ceil(sizeof(Word)), // W
|
|
log2ceil(DCACHE_NUM_WAYS),// A
|
|
32, // address bits
|
|
DCACHE_NUM_BANKS, // number of banks
|
|
DCACHE_NUM_PORTS, // number of ports
|
|
(uint8_t)arch.num_threads(), // number of requests
|
|
true, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
DCACHE_MSHR_SIZE, // mshr
|
|
4, // pipeline latency
|
|
});
|
|
|
|
dcaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(1));
|
|
l2cache_->CoreRspPorts.at(1).bind(&dcaches_->MemRspPort);
|
|
|
|
snprintf(sname, 100, "cluster%d-tcaches", cluster_id);
|
|
tcaches_ = CacheCluster::Create(sname, NUM_TEX_UNITS, NUM_TCACHES, CacheSim::Config{
|
|
!TCACHE_ENABLED,
|
|
log2ceil(TCACHE_SIZE), // C
|
|
log2ceil(L1_BLOCK_SIZE),// B
|
|
log2ceil(sizeof(uint32_t)), // W
|
|
log2ceil(TCACHE_NUM_WAYS),// A
|
|
32, // address bits
|
|
TCACHE_NUM_BANKS, // number of banks
|
|
TCACHE_NUM_PORTS, // number of ports
|
|
(uint8_t)arch.num_threads(), // number of requests
|
|
false, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
TCACHE_MSHR_SIZE, // mshr
|
|
4, // pipeline latency
|
|
});
|
|
|
|
tcaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(2));
|
|
l2cache_->CoreRspPorts.at(2).bind(&tcaches_->MemRspPort);
|
|
|
|
snprintf(sname, 100, "cluster%d-ocaches", cluster_id);
|
|
ocaches_ = CacheCluster::Create(sname, NUM_ROP_UNITS, NUM_OCACHES, CacheSim::Config{
|
|
!OCACHE_ENABLED,
|
|
log2ceil(OCACHE_SIZE), // C
|
|
log2ceil(MEM_BLOCK_SIZE), // B
|
|
log2ceil(sizeof(uint32_t)), // W
|
|
log2ceil(OCACHE_NUM_WAYS), // A
|
|
32, // address bits
|
|
OCACHE_NUM_BANKS, // number of banks
|
|
OCACHE_NUM_PORTS, // number of ports
|
|
(uint8_t)arch.num_threads(), // number of requests
|
|
false, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
OCACHE_MSHR_SIZE, // mshr
|
|
4, // pipeline latency
|
|
});
|
|
|
|
ocaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(3));
|
|
l2cache_->CoreRspPorts.at(3).bind(&ocaches_->MemRspPort);
|
|
|
|
snprintf(sname, 100, "cluster%d-rcaches", cluster_id);
|
|
rcaches_ = CacheCluster::Create(sname, NUM_RASTER_UNITS, NUM_RCACHES, CacheSim::Config{
|
|
!RCACHE_ENABLED,
|
|
log2ceil(RCACHE_SIZE), // C
|
|
log2ceil(MEM_BLOCK_SIZE), // B
|
|
log2ceil(sizeof(uint32_t)), // W
|
|
log2ceil(RCACHE_NUM_WAYS), // A
|
|
32, // address bits
|
|
RCACHE_NUM_BANKS, // number of banks
|
|
RCACHE_NUM_PORTS, // number of ports
|
|
1, // number of requests
|
|
false, // write-through
|
|
false, // write response
|
|
0, // victim size
|
|
RCACHE_MSHR_SIZE, // mshr
|
|
4, // pipeline latency
|
|
});
|
|
|
|
rcaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(4));
|
|
l2cache_->CoreRspPorts.at(4).bind(&rcaches_->MemRspPort);
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
uint32_t cores_per_raster = cores_per_cluster / NUM_RASTER_UNITS;
|
|
uint32_t cores_per_rop = cores_per_cluster / NUM_ROP_UNITS;
|
|
uint32_t cores_per_tex = cores_per_cluster / NUM_TEX_UNITS;
|
|
|
|
// create raster units
|
|
for (uint32_t i = 0; i < NUM_RASTER_UNITS; ++i) {
|
|
snprintf(sname, 100, "cluster%d-raster_unit%d", cluster_id, i);
|
|
uint32_t raster_idx = cluster_id * NUM_RASTER_UNITS + i;
|
|
raster_units_.at(i) = RasterUnit::Create(sname, raster_idx, cores_per_raster, arch, dcrs.raster_dcrs, RasterUnit::Config{
|
|
RASTER_TILE_LOGSIZE,
|
|
RASTER_BLOCK_LOGSIZE
|
|
});
|
|
raster_units_.at(i)->MemReqs.bind(&rcaches_->CoreReqPorts.at(i).at(0));
|
|
rcaches_->CoreRspPorts.at(i).at(0).bind(&raster_units_.at(i)->MemRsps);
|
|
}
|
|
|
|
// create rop units
|
|
for (uint32_t i = 0; i < NUM_ROP_UNITS; ++i) {
|
|
snprintf(sname, 100, "cluster%d-rop_unit%d", cluster_id, i);
|
|
rop_units_.at(i) = RopUnit::Create(sname, cores_per_rop, arch, dcrs.rop_dcrs);
|
|
for (uint32_t j = 0; j < arch.num_threads(); ++j) {
|
|
rop_units_.at(i)->MemReqs.at(j).bind(&ocaches_->CoreReqPorts.at(i).at(j));
|
|
ocaches_->CoreRspPorts.at(i).at(j).bind(&rop_units_.at(i)->MemRsps.at(j));
|
|
}
|
|
}
|
|
|
|
// create tex units
|
|
for (uint32_t i = 0; i < NUM_TEX_UNITS; ++i) {
|
|
snprintf(sname, 100, "cluster%d-tex_unit%d", cluster_id, i);
|
|
tex_units_.at(i) = TexUnit::Create(sname, cores_per_tex, arch, dcrs.tex_dcrs, TexUnit::Config{
|
|
2, // address latency
|
|
6, // sampler latency
|
|
});
|
|
for (uint32_t j = 0; j < arch.num_threads(); ++j) {
|
|
tex_units_.at(i)->MemReqs.at(j).bind(&tcaches_->CoreReqPorts.at(i).at(j));
|
|
tcaches_->CoreRspPorts.at(i).at(j).bind(&tex_units_.at(i)->MemRsps.at(j));
|
|
}
|
|
}
|
|
|
|
// create shared memory blocks
|
|
for (uint32_t i = 0; i < cores_per_cluster; ++i) {
|
|
snprintf(sname, 100, "cluster%d-shared_mem%d", cluster_id, i);
|
|
sharedmems_.at(i) = SharedMem::Create(sname, SharedMem::Config{
|
|
uint32_t(SMEM_LOCAL_SIZE) * arch.num_warps() * arch.num_threads(),
|
|
SMEM_LOCAL_SIZE,
|
|
arch.num_threads(),
|
|
arch.num_threads(),
|
|
log2ceil(STACK_SIZE),
|
|
1,
|
|
false
|
|
});
|
|
}
|
|
|
|
// create cores
|
|
for (uint32_t i = 0; i < cores_per_cluster; ++i) {
|
|
uint32_t raster_idx = i / cores_per_raster;
|
|
uint32_t rop_idx = i / cores_per_rop;
|
|
uint32_t tex_idx = i / cores_per_tex;
|
|
|
|
uint32_t core_id = cluster_id * cores_per_cluster + i;
|
|
|
|
cores_.at(i) = Core::Create(core_id,
|
|
this,
|
|
arch,
|
|
dcrs,
|
|
sharedmems_.at(i),
|
|
raster_units_.at(raster_idx),
|
|
rop_units_.at(rop_idx),
|
|
tex_units_.at(tex_idx));
|
|
|
|
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
|
|
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));
|
|
|
|
for (uint32_t j = 0; j < arch.num_threads(); ++j) {
|
|
snprintf(sname, 100, "cluster%d-smem_demux%d_%d", cluster_id, i, j);
|
|
auto smem_demux = SMemDemux::Create(sname);
|
|
|
|
cores_.at(i)->dcache_req_ports.at(j).bind(&smem_demux->ReqIn);
|
|
smem_demux->RspIn.bind(&cores_.at(i)->dcache_rsp_ports.at(j));
|
|
|
|
smem_demux->ReqDc.bind(&dcaches_->CoreReqPorts.at(i).at(j));
|
|
dcaches_->CoreRspPorts.at(i).at(j).bind(&smem_demux->RspDc);
|
|
|
|
smem_demux->ReqSm.bind(&sharedmems_.at(i)->Inputs.at(j));
|
|
sharedmems_.at(i)->Outputs.at(j).bind(&smem_demux->RspSm);
|
|
}
|
|
}
|
|
}
|
|
|
|
Cluster::~Cluster() {
|
|
//--
|
|
}
|
|
|
|
void Cluster::attach_ram(RAM* ram) {
|
|
for (auto core : cores_) {
|
|
core->attach_ram(ram);
|
|
}
|
|
for (auto raster_unit : raster_units_) {
|
|
raster_unit->attach_ram(ram);
|
|
}
|
|
for (auto rop_unit : rop_units_) {
|
|
rop_unit->attach_ram(ram);
|
|
}
|
|
for (auto tex_unit : tex_units_) {
|
|
tex_unit->attach_ram(ram);
|
|
}
|
|
}
|
|
|
|
bool Cluster::running() const {
|
|
for (auto& core : cores_) {
|
|
if (core->running())
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Cluster::getIRegValue(int* value, int reg) const {
|
|
for (auto& core : cores_) {
|
|
if (core->check_exit()) {
|
|
*value = core->getIRegValue(reg);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void Cluster::bind(SimPort<MemReq>* mem_req_port, SimPort<MemRsp>* mem_rsp_port) {
|
|
l2cache_->MemReqPort.bind(mem_req_port);
|
|
mem_rsp_port->bind(&l2cache_->MemRspPort);
|
|
}
|
|
|
|
ProcessorImpl* Cluster::processor() const {
|
|
return processor_;
|
|
}
|
|
|
|
Cluster::PerfStats Cluster::perf_stats() const {
|
|
Cluster::PerfStats perf;
|
|
perf.icache = icaches_->perf_stats();
|
|
perf.dcache = dcaches_->perf_stats();
|
|
perf.tcache = tcaches_->perf_stats();
|
|
perf.ocache = ocaches_->perf_stats();
|
|
perf.rcache = rcaches_->perf_stats();
|
|
perf.l2cache = l2cache_->perf_stats();
|
|
|
|
for (auto sharedmem : sharedmems_) {
|
|
perf.sharedmem += sharedmem->perf_stats();
|
|
}
|
|
|
|
for (uint32_t i = 0; i < NUM_RASTER_UNITS; ++i) {
|
|
perf.raster_unit += raster_units_.at(i)->perf_stats();
|
|
}
|
|
|
|
for (uint32_t i = 0; i < NUM_ROP_UNITS; ++i) {
|
|
perf.rop_unit += rop_units_.at(i)->perf_stats();
|
|
}
|
|
|
|
for (uint32_t i = 0; i < NUM_TEX_UNITS; ++i) {
|
|
perf.tex_unit += tex_units_.at(i)->perf_stats();
|
|
}
|
|
|
|
return perf;
|
|
} |