mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor update
This commit is contained in:
parent
5ea10fd872
commit
e84f978502
8 changed files with 67 additions and 35 deletions
|
@ -155,6 +155,9 @@ void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
|||
// back to single-threaded
|
||||
vx_tmc_one();
|
||||
}
|
||||
|
||||
// wait for spawned tasks to complete
|
||||
vx_wspawn(1, 0);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -315,6 +318,9 @@ void vx_spawn_pocl_kernel(pocl_kernel_context_t * ctx, pocl_kernel_cb callback,
|
|||
// back to single-threaded
|
||||
vx_tmc_one();
|
||||
}
|
||||
|
||||
// wait for spawned tasks to complete
|
||||
vx_wspawn(1, 0);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -393,6 +393,10 @@ void Core::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
|
|||
emulator_.barrier(bar_id, count, wid);
|
||||
}
|
||||
|
||||
void Core::wspawn(uint32_t num_warps, Word nextPC) {
|
||||
emulator_.wspawn(num_warps, nextPC);
|
||||
}
|
||||
|
||||
void Core::attach_ram(RAM* ram) {
|
||||
emulator_.attach_ram(ram);
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -56,7 +56,7 @@ public:
|
|||
uint64_t ifetch_latency;
|
||||
uint64_t load_latency;
|
||||
|
||||
PerfStats()
|
||||
PerfStats()
|
||||
: cycles(0)
|
||||
, instrs(0)
|
||||
, sched_idle(0)
|
||||
|
@ -83,10 +83,10 @@ public:
|
|||
std::vector<SimPort<MemReq>> dcache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> dcache_rsp_ports;
|
||||
|
||||
Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Socket* socket,
|
||||
const Arch &arch,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs);
|
||||
|
||||
~Core();
|
||||
|
@ -103,6 +103,8 @@ public:
|
|||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t wid);
|
||||
|
||||
void wspawn(uint32_t num_warps, Word nextPC);
|
||||
|
||||
uint32_t id() const {
|
||||
return core_id_;
|
||||
}
|
||||
|
@ -139,26 +141,26 @@ private:
|
|||
const Arch& arch_;
|
||||
|
||||
Emulator emulator_;
|
||||
|
||||
|
||||
std::vector<IBuffer> ibuffers_;
|
||||
Scoreboard scoreboard_;
|
||||
std::vector<Operand::Ptr> operands_;
|
||||
std::vector<Dispatcher::Ptr> dispatchers_;
|
||||
std::vector<FuncUnit::Ptr> func_units_;
|
||||
std::vector<FuncUnit::Ptr> func_units_;
|
||||
LocalMem::Ptr local_mem_;
|
||||
std::vector<LocalMemDemux::Ptr> lsu_demux_;
|
||||
std::vector<MemCoalescer::Ptr> mem_coalescers_;
|
||||
|
||||
PipelineLatch fetch_latch_;
|
||||
PipelineLatch decode_latch_;
|
||||
|
||||
|
||||
HashTable<instr_trace_t*> pending_icache_;
|
||||
uint64_t pending_instrs_;
|
||||
|
||||
uint64_t pending_ifetches_;
|
||||
|
||||
|
||||
PerfStats perf_stats_;
|
||||
|
||||
|
||||
std::vector<TraceSwitch::Ptr> commit_arbs_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
|
|
|
@ -108,6 +108,7 @@ void Emulator::clear() {
|
|||
// activate first warp and thread
|
||||
active_warps_.set(0);
|
||||
warps_[0].tmask.set(0);
|
||||
wspawn_.valid = false;
|
||||
}
|
||||
|
||||
void Emulator::attach_ram(RAM* ram) {
|
||||
|
@ -122,6 +123,19 @@ void Emulator::attach_ram(RAM* ram) {
|
|||
instr_trace_t* Emulator::step() {
|
||||
int scheduled_warp = -1;
|
||||
|
||||
// process pending wspawn
|
||||
if (wspawn_.valid && active_warps_.count() == 1) {
|
||||
DP(3, "*** Activate " << (wspawn_.num_warps-1) << " warps at PC: " << std::hex << wspawn_.nextPC);
|
||||
for (uint32_t i = 1; i < wspawn_.num_warps; ++i) {
|
||||
auto& warp = warps_.at(i);
|
||||
warp.PC = wspawn_.nextPC;
|
||||
warp.tmask.set(0);
|
||||
active_warps_.set(i);
|
||||
}
|
||||
wspawn_.valid = false;
|
||||
stalled_warps_.reset(0);
|
||||
}
|
||||
|
||||
// find next ready warp
|
||||
for (size_t wid = 0, nw = arch_.num_warps(); wid < nw; ++wid) {
|
||||
bool warp_active = active_warps_.test(wid);
|
||||
|
@ -210,19 +224,10 @@ void Emulator::resume(uint32_t wid) {
|
|||
}
|
||||
}
|
||||
|
||||
bool Emulator::wspawn(uint32_t num_warps, Word nextPC) {
|
||||
// wait for single warp
|
||||
if (active_warps_.count() != 1)
|
||||
return false;
|
||||
uint32_t active_warps = std::min<uint32_t>(num_warps, arch_.num_warps());
|
||||
DP(3, "*** Activate " << (active_warps-1) << " warps at PC: " << std::hex << nextPC);
|
||||
for (uint32_t i = 1; i < active_warps; ++i) {
|
||||
auto& warp = warps_.at(i);
|
||||
warp.PC = nextPC;
|
||||
warp.tmask.set(0);
|
||||
active_warps_.set(i);
|
||||
}
|
||||
return true;
|
||||
void Emulator::wspawn(uint32_t num_warps, Word nextPC) {
|
||||
wspawn_.valid = true;
|
||||
wspawn_.num_warps = std::min<uint32_t>(num_warps, arch_.num_warps());
|
||||
wspawn_.nextPC = nextPC;
|
||||
}
|
||||
|
||||
void Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
|
||||
|
|
|
@ -50,7 +50,7 @@ public:
|
|||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t wid);
|
||||
|
||||
bool wspawn(uint32_t num_warps, Word nextPC);
|
||||
void wspawn(uint32_t num_warps, Word nextPC);
|
||||
|
||||
int get_exitcode() const;
|
||||
|
||||
|
@ -78,6 +78,12 @@ private:
|
|||
UUIDGenerator uui_gen;
|
||||
};
|
||||
|
||||
struct wspawn_t {
|
||||
bool valid;
|
||||
uint32_t num_warps;
|
||||
Word nextPC;
|
||||
};
|
||||
|
||||
std::shared_ptr<Instr> decode(uint32_t code) const;
|
||||
|
||||
void execute(const Instr &instr, uint32_t wid, instr_trace_t *trace);
|
||||
|
@ -117,7 +123,8 @@ private:
|
|||
std::vector<WarpMask> barriers_;
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
MemoryUnit mmu_;
|
||||
Word csr_mscratch_;
|
||||
Word csr_mscratch_;
|
||||
wspawn_t wspawn_;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -257,12 +257,12 @@ void SfuUnit::tick() {
|
|||
|
||||
switch (sfu_type) {
|
||||
case SfuType::WSPAWN:
|
||||
output.push(trace, 1);
|
||||
if (trace->eop) {
|
||||
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
|
||||
if (!core_->emulator_.wspawn(trace_data->arg1, trace_data->arg2))
|
||||
return;
|
||||
core_->wspawn(trace_data->arg1, trace_data->arg2);
|
||||
release_warp = false;
|
||||
}
|
||||
output.push(trace, 1);
|
||||
break;
|
||||
case SfuType::TMC:
|
||||
case SfuType::SPLIT:
|
||||
|
@ -275,11 +275,11 @@ void SfuUnit::tick() {
|
|||
break;
|
||||
case SfuType::BAR: {
|
||||
output.push(trace, 1);
|
||||
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
|
||||
if (trace->eop) {
|
||||
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
|
||||
core_->barrier(trace_data->arg1, trace_data->arg2, trace->wid);
|
||||
release_warp = false;
|
||||
}
|
||||
release_warp = false;
|
||||
} break;
|
||||
case SfuType::CMOV:
|
||||
output.push(trace, 3);
|
||||
|
|
|
@ -332,9 +332,14 @@ void kernel_trigo(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
|||
auto dst_ptr = (float*)arg->dst_addr;
|
||||
auto offset = task_id * count;
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
auto a = src0_ptr[offset+i];
|
||||
auto b = src1_ptr[offset+i];
|
||||
dst_ptr[offset+i] = sinf(a * b);
|
||||
uint32_t j = offset + i;
|
||||
auto a = src0_ptr[j];
|
||||
auto b = src1_ptr[j];
|
||||
auto c = a * b;
|
||||
if ((j % 4) == 0) {
|
||||
c = sinf(c);
|
||||
}
|
||||
dst_ptr[j] = c;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -752,7 +752,10 @@ public:
|
|||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = sinf(a[i] * b[i]);
|
||||
auto ref = a[i] * b[i];
|
||||
if ((i % 4) == 0) {
|
||||
ref = sinf(ref);
|
||||
}
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue