mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-22 21:09:15 -04:00
global barrier fixes
This commit is contained in:
parent
ececadd77b
commit
24471d11c1
6 changed files with 33 additions and 39 deletions
|
@ -215,7 +215,7 @@ module VX_warp_sched #(
|
|||
assign gbar_if.req_valid = gbar_req_valid;
|
||||
assign gbar_if.req_id = gbar_req_id;
|
||||
assign gbar_if.req_size_m1 = gbar_req_size_m1;
|
||||
assign gbar_if.req_core_id = NC_WIDTH'(CORE_ID);
|
||||
assign gbar_if.req_core_id = NC_WIDTH'(CORE_ID % `NUM_CORES);
|
||||
|
||||
// split/join stack management
|
||||
|
||||
|
|
|
@ -12,7 +12,8 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
, mem_req_port(this)
|
||||
, mem_rsp_port(this)
|
||||
, cluster_id_(cluster_id)
|
||||
, cores_(num_cores)
|
||||
, cores_(num_cores)
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, raster_units_(NUM_RASTER_UNITS)
|
||||
, rop_units_(NUM_ROP_UNITS)
|
||||
, tex_units_(NUM_TEX_UNITS)
|
||||
|
@ -20,7 +21,6 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
, processor_(processor)
|
||||
{
|
||||
char sname[100];
|
||||
|
||||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||
!L2_ENABLED,
|
||||
|
@ -257,8 +257,10 @@ Cluster::~Cluster() {
|
|||
//--
|
||||
}
|
||||
|
||||
void Cluster::reset() {
|
||||
//--
|
||||
void Cluster::reset() {
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void Cluster::tick() {
|
||||
|
@ -303,6 +305,26 @@ bool Cluster::check_exit(Word* exitcode, int reg) const {
|
|||
return done;
|
||||
}
|
||||
|
||||
void Cluster::barrier(uint32_t bar_id, uint32_t count, uint32_t core_id) {
|
||||
auto& barrier = barriers_.at(bar_id);
|
||||
|
||||
uint32_t local_core_id = core_id % cores_.size();
|
||||
barrier.set(local_core_id);
|
||||
|
||||
DP(3, "*** Suspend core #" << core_id << " at barrier #" << bar_id);
|
||||
|
||||
if (barrier.count() == (size_t)count) {
|
||||
// resume all suspended cores
|
||||
for (uint32_t i = 0; i < cores_.size(); ++i) {
|
||||
if (barrier.test(i)) {
|
||||
DP(3, "*** Resume core #" << i << " at barrier #" << bar_id);
|
||||
cores_.at(i)->resume();
|
||||
}
|
||||
}
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
ProcessorImpl* Cluster::processor() const {
|
||||
return processor_;
|
||||
}
|
||||
|
|
|
@ -64,19 +64,18 @@ public:
|
|||
|
||||
bool running() const;
|
||||
|
||||
bool check_exit(Word* exitcode, int reg) const;
|
||||
bool check_exit(Word* exitcode, int reg) const;
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
ProcessorImpl* processor() const;
|
||||
|
||||
Cluster::PerfStats perf_stats() const;
|
||||
|
||||
auto& core(uint32_t index) {
|
||||
return cores_.at(index);
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t cluster_id_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
std::vector<RasterUnit::Ptr> raster_units_;
|
||||
std::vector<RopUnit::Ptr> rop_units_;
|
||||
std::vector<TexUnit::Ptr> tex_units_;
|
||||
|
|
|
@ -326,7 +326,7 @@ void Core::barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id) {
|
|||
if (is_global) {
|
||||
// global barrier handling
|
||||
if (barrier.count() == active_warps_.count()) {
|
||||
cluster_->processor()->barrier(bar_idx, count, core_id_);
|
||||
cluster_->barrier(bar_idx, count, core_id_);
|
||||
barrier.reset();
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -6,7 +6,6 @@ using namespace vortex;
|
|||
ProcessorImpl::ProcessorImpl(const Arch& arch)
|
||||
: arch_(arch)
|
||||
, clusters_(NUM_CLUSTERS)
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
{
|
||||
SimPlatform::instance().initialize();
|
||||
|
||||
|
@ -100,35 +99,12 @@ int ProcessorImpl::run() {
|
|||
}
|
||||
|
||||
void ProcessorImpl::reset() {
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
perf_mem_reads_ = 0;
|
||||
perf_mem_writes_ = 0;
|
||||
perf_mem_latency_ = 0;
|
||||
perf_mem_pending_reads_ = 0;
|
||||
}
|
||||
|
||||
void ProcessorImpl::barrier(uint32_t bar_id, uint32_t count, uint32_t core_id) {
|
||||
auto& barrier = barriers_.at(bar_id);
|
||||
barrier.set(core_id);
|
||||
DP(3, "*** Suspend core #" << core_id << " at barrier #" << bar_id);
|
||||
|
||||
if (barrier.count() == (size_t)count) {
|
||||
// resume suspended cores
|
||||
uint32_t cores_per_cluster = arch_.num_cores() / NUM_CLUSTERS;
|
||||
for (uint32_t i = 0; i < arch_.num_cores(); ++i) {
|
||||
if (barrier.test(i)) {
|
||||
DP(3, "*** Resume core #" << i << " at barrier #" << bar_id);
|
||||
uint32_t core_idx = i % cores_per_cluster;
|
||||
uint32_t cluster_idx = i / cores_per_cluster;
|
||||
clusters_.at(cluster_idx)->core(core_idx)->resume();
|
||||
}
|
||||
}
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessorImpl::write_dcr(uint32_t addr, uint32_t value) {
|
||||
dcrs_.write(addr, value);
|
||||
}
|
||||
|
|
|
@ -31,8 +31,6 @@ public:
|
|||
|
||||
int run();
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
ProcessorImpl::PerfStats perf_stats() const;
|
||||
|
@ -46,7 +44,6 @@ private:
|
|||
DCRS dcrs_;
|
||||
MemSim::Ptr memsim_;
|
||||
CacheSim::Ptr l3cache_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
uint64_t perf_mem_reads_;
|
||||
uint64_t perf_mem_writes_;
|
||||
uint64_t perf_mem_latency_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue