mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
Fully-functioning spawn and join instructions.
This commit is contained in:
parent
7529be422b
commit
56aaff1f87
5 changed files with 131 additions and 56 deletions
|
@ -46,6 +46,8 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id) :
|
|||
for (Word i = 0; i < a.getNPRegs(); ++i) {
|
||||
pred[j].push_back(Reg<bool>(id, regNum++));
|
||||
}
|
||||
|
||||
tmask.push_back(true);
|
||||
}
|
||||
|
||||
/* Set initial register contents. */
|
||||
|
@ -131,6 +133,11 @@ void Core::step() {
|
|||
D_RAW(" (");
|
||||
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
|
||||
D_RAW(')' << endl);
|
||||
|
||||
D(3, "Thread mask:");
|
||||
D_RAW(" ");
|
||||
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
|
||||
D_RAW(endl);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace Harp {
|
|||
|
||||
Reg &operator=(T r) { val = r; doWrite(); return *this; }
|
||||
|
||||
operator T() { doRead(); return val; }
|
||||
operator T() const { doRead(); return val; }
|
||||
|
||||
void trunc(Size s) {
|
||||
Word mask((~0ull >> (sizeof(Word)-s)*8));
|
||||
|
@ -40,16 +40,32 @@ namespace Harp {
|
|||
|
||||
#ifdef EMU_INSTRUMENTATION
|
||||
/* Access size here is 8, representing the register size of 64-bit cores. */
|
||||
void doWrite() { reg_doWrite(cpuId, regNum); }
|
||||
void doRead() { reg_doRead(cpuId, regNum); }
|
||||
void doWrite() const { reg_doWrite(cpuId, regNum); }
|
||||
void doRead() const { reg_doRead(cpuId, regNum); }
|
||||
#else
|
||||
void doWrite() {}
|
||||
void doRead() {}
|
||||
void doWrite() const {}
|
||||
void doRead() const {}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Entry in the IPDOM Stack
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(
|
||||
unsigned p, const std::vector<std::vector<Reg<bool> > >& m, Word pc
|
||||
): pc(pc), fallThrough(false)
|
||||
{
|
||||
std::cout << "New DomStackEntry:";
|
||||
for (unsigned i = 0; i < m.size(); ++i) {
|
||||
tmask.push_back(!bool(m[i][p]));
|
||||
std::cout << ' ' << bool(m[i][p]);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
DomStackEntry(const std::vector<bool> &tmask):
|
||||
tmask(tmask), fallThrough(true) {}
|
||||
|
||||
bool fallThrough;
|
||||
std::vector<bool> tmask;
|
||||
Word pc;
|
||||
};
|
||||
|
|
|
@ -121,12 +121,16 @@ void Instruction::executeOn(Core &c) {
|
|||
return;
|
||||
}
|
||||
|
||||
/* Also throw exceptions on divergent branches. */
|
||||
if (predicated && instTable[op].controlFlow) {
|
||||
bool p0 = c.pred[0][pred];
|
||||
for (Size t = 1; t < c.activeThreads; t++) {
|
||||
if (c.pred[t][pred] != p0) throw DivergentBranchException();
|
||||
/* Also throw exceptions on non-masked divergent branches. */
|
||||
if (instTable[op].controlFlow) {
|
||||
Size t, count, active;
|
||||
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
|
||||
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
|
||||
if (c.tmask[t]) ++active;
|
||||
}
|
||||
|
||||
if (count != 0 && count != active)
|
||||
throw DivergentBranchException();
|
||||
}
|
||||
|
||||
Size nextActiveThreads = c.activeThreads;
|
||||
|
@ -135,8 +139,12 @@ void Instruction::executeOn(Core &c) {
|
|||
for (Size t = 0; t < c.activeThreads; t++) {
|
||||
vector<Reg<Word> > ®(c.reg[t]);
|
||||
vector<Reg<bool> > &pReg(c.pred[t]);
|
||||
stack<DomStackEntry> &domStack(c.domStack);
|
||||
|
||||
if (predicated && !pReg[pred]) continue;
|
||||
// If this thread is masked out, don't execute the instruction, unless it's
|
||||
// a split or join.
|
||||
if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
|
||||
op != SPLIT && op != JOIN) continue;
|
||||
|
||||
Word memAddr;
|
||||
switch (op) {
|
||||
|
@ -282,6 +290,23 @@ void Instruction::executeOn(Core &c) {
|
|||
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
|
||||
double(Float(reg[rsrc[1]], wordSz)),wordSz);
|
||||
break;
|
||||
case SPLIT:if (t == 0) {
|
||||
// TODO: if mask becomes all-zero, fall through
|
||||
DomStackEntry e(pred, c.pred, c.pc);
|
||||
c.domStack.push(c.tmask);
|
||||
c.domStack.push(e);
|
||||
for (unsigned i = 0; i < e.tmask.size(); ++i)
|
||||
c.tmask[i] = !e.tmask[i];
|
||||
}
|
||||
break;
|
||||
case JOIN: if (t == 0) {
|
||||
// TODO: if mask becomes all-zero, fall through
|
||||
if (!c.domStack.top().fallThrough)
|
||||
c.pc = c.domStack.top().pc;
|
||||
c.tmask = c.domStack.top().tmask;
|
||||
c.domStack.pop();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cout << "ERROR: Unsupported instruction: " << *this << "\n";
|
||||
exit(1);
|
||||
|
|
|
@ -5,12 +5,14 @@ HARPDIS = ../harptool -D
|
|||
4BARCH = 4b16/16/2
|
||||
|
||||
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
|
||||
matmul-mt.s
|
||||
matmul-mt.bin diverge.bin
|
||||
|
||||
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
|
||||
matmul-mt.out
|
||||
matmul-mt.out diverge.out
|
||||
|
||||
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d
|
||||
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \
|
||||
bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d \
|
||||
diverge.d diverge.4b.d
|
||||
|
||||
%.4b.out : %.4b.bin
|
||||
$(HARPEM) -a $(4BARCH) -c $< > $@
|
||||
|
@ -18,50 +20,11 @@ disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d b
|
|||
%.out : %.bin
|
||||
$(HARPEM) -c $< > $@
|
||||
|
||||
2thread.bin : boot.HOF lib.HOF 2thread.HOF
|
||||
$(HARPLD) -o 2thread.bin $^
|
||||
|
||||
2thread.4b.bin : boot.4b.HOF lib.4b.HOF 2thread.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o 2thread.4b.bin $^
|
||||
|
||||
bubble.bin : boot.HOF lib.HOF bubble.HOF
|
||||
$(HARPLD) -o bubble.bin $^
|
||||
|
||||
bubble.4b.bin : boot.4b.HOF lib.4b.HOF bubble.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o bubble.4b.bin $^
|
||||
|
||||
simple.bin : boot.HOF lib.HOF simple.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
sieve.bin : boot.HOF lib.HOF sieve.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
dotprod.bin : boot.HOF lib.HOF dotprod.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
matmul.bin : boot.HOF lib.HOF matmul.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
|
||||
%.4b.bin : boot.4b.HOF lib.4b.HOF %.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
sieve.4b.bin : boot.4b.HOF lib.4b.HOF sieve.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
dotprod.4b.bin : boot.4b.HOF lib.4b.HOF dotprod.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
matmul.4b.bin : boot.4b.HOF lib.4b.HOF matmul.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
%.4b.bin : %.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $<
|
||||
|
||||
%.bin : %.HOF
|
||||
$(HARPLD) -o $@ $<
|
||||
%.bin : boot.HOF lib.HOF %.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
%.4b.HOF : %.s
|
||||
$(HARPAS) --arch $(4BARCH) -o $@ $<
|
||||
|
|
64
src/test/diverge.s
Normal file
64
src/test/diverge.s
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*******************************************************************************
|
||||
Harptools by Chad D. Kersey, Summer 2011
|
||||
********************************************************************************
|
||||
|
||||
Sample HARP assmebly program.
|
||||
|
||||
*******************************************************************************/
|
||||
/* Divergent branch: test immediate postdominator branch divergence support. */
|
||||
.def THREADS 8
|
||||
|
||||
.align 4096
|
||||
.perm x
|
||||
.entry
|
||||
.global
|
||||
entry:
|
||||
ldi %r0, #1
|
||||
ldi %r1, THREADS
|
||||
sloop: clone %r0
|
||||
|
||||
addi %r0, %r0, #1
|
||||
sub %r2, %r1, %r0
|
||||
rtop @p0, %r2
|
||||
@p0 ? jmpi sloop
|
||||
|
||||
ldi %r0, #0
|
||||
jalis %r5, %r1, dthread;
|
||||
|
||||
ldi %r0, #0
|
||||
ldi %r1, (__WORD * THREADS)
|
||||
|
||||
ploop: ld %r7, %r0, array
|
||||
jali %r5, printdec
|
||||
|
||||
addi %r0, %r0, __WORD
|
||||
sub %r7, %r1, %r0
|
||||
rtop @p0, %r7
|
||||
@p0 ? jmpi ploop
|
||||
|
||||
trap;
|
||||
|
||||
|
||||
dthread: ldi %r1, #10
|
||||
ldi %r2, #0
|
||||
|
||||
loop: andi %r3, %r0, #1
|
||||
rtop @p1, %r3
|
||||
@p1 ? split
|
||||
@p1 ? jmpi else
|
||||
add %r2, %r2, %r0
|
||||
jmpi after
|
||||
else: sub %r2, %r2, %r0
|
||||
after: join
|
||||
|
||||
subi %r1, %r1, #1
|
||||
rtop @p0, %r1
|
||||
@p0 ? jmpi loop
|
||||
|
||||
shli %r4, %r0, (`__WORD)
|
||||
st %r2, %r4, array
|
||||
|
||||
jmprt %r5;
|
||||
|
||||
.align 4096
|
||||
array: .space 4096
|
Loading…
Add table
Add a link
Reference in a new issue