Fully-functioning spawn and join instructions.

This commit is contained in:
cdkersey 2014-09-09 03:08:23 -04:00
parent 7529be422b
commit 56aaff1f87
5 changed files with 131 additions and 56 deletions

View file

@ -46,6 +46,8 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id) :
for (Word i = 0; i < a.getNPRegs(); ++i) {
pred[j].push_back(Reg<bool>(id, regNum++));
}
tmask.push_back(true);
}
/* Set initial register contents. */
@ -131,6 +133,11 @@ void Core::step() {
D_RAW(" (");
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
D_RAW(')' << endl);
D(3, "Thread mask:");
D_RAW(" ");
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
D_RAW(endl);
}
#endif

View file

@ -27,7 +27,7 @@ namespace Harp {
Reg &operator=(T r) { val = r; doWrite(); return *this; }
operator T() { doRead(); return val; }
operator T() const { doRead(); return val; }
void trunc(Size s) {
Word mask((~0ull >> (sizeof(Word)-s)*8));
@ -40,16 +40,32 @@ namespace Harp {
#ifdef EMU_INSTRUMENTATION
/* Access size here is 8, representing the register size of 64-bit cores. */
void doWrite() { reg_doWrite(cpuId, regNum); }
void doRead() { reg_doRead(cpuId, regNum); }
void doWrite() const { reg_doWrite(cpuId, regNum); }
void doRead() const { reg_doRead(cpuId, regNum); }
#else
void doWrite() {}
void doRead() {}
void doWrite() const {}
void doRead() const {}
#endif
};
// Entry in the IPDOM Stack
struct DomStackEntry {
DomStackEntry(
unsigned p, const std::vector<std::vector<Reg<bool> > >& m, Word pc
): pc(pc), fallThrough(false)
{
std::cout << "New DomStackEntry:";
for (unsigned i = 0; i < m.size(); ++i) {
tmask.push_back(!bool(m[i][p]));
std::cout << ' ' << bool(m[i][p]);
}
std::cout << std::endl;
}
DomStackEntry(const std::vector<bool> &tmask):
tmask(tmask), fallThrough(true) {}
bool fallThrough;
std::vector<bool> tmask;
Word pc;
};

View file

@ -121,12 +121,16 @@ void Instruction::executeOn(Core &c) {
return;
}
/* Also throw exceptions on divergent branches. */
if (predicated && instTable[op].controlFlow) {
bool p0 = c.pred[0][pred];
for (Size t = 1; t < c.activeThreads; t++) {
if (c.pred[t][pred] != p0) throw DivergentBranchException();
/* Also throw exceptions on non-masked divergent branches. */
if (instTable[op].controlFlow) {
Size t, count, active;
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
if (c.tmask[t]) ++active;
}
if (count != 0 && count != active)
throw DivergentBranchException();
}
Size nextActiveThreads = c.activeThreads;
@ -135,8 +139,12 @@ void Instruction::executeOn(Core &c) {
for (Size t = 0; t < c.activeThreads; t++) {
vector<Reg<Word> > &reg(c.reg[t]);
vector<Reg<bool> > &pReg(c.pred[t]);
stack<DomStackEntry> &domStack(c.domStack);
if (predicated && !pReg[pred]) continue;
// If this thread is masked out, don't execute the instruction, unless it's
// a split or join.
if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
op != SPLIT && op != JOIN) continue;
Word memAddr;
switch (op) {
@ -282,6 +290,23 @@ void Instruction::executeOn(Core &c) {
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
double(Float(reg[rsrc[1]], wordSz)),wordSz);
break;
case SPLIT:if (t == 0) {
// TODO: if mask becomes all-zero, fall through
DomStackEntry e(pred, c.pred, c.pc);
c.domStack.push(c.tmask);
c.domStack.push(e);
for (unsigned i = 0; i < e.tmask.size(); ++i)
c.tmask[i] = !e.tmask[i];
}
break;
case JOIN: if (t == 0) {
// TODO: if mask becomes all-zero, fall through
if (!c.domStack.top().fallThrough)
c.pc = c.domStack.top().pc;
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
}
break;
default:
cout << "ERROR: Unsupported instruction: " << *this << "\n";
exit(1);

View file

@ -5,12 +5,14 @@ HARPDIS = ../harptool -D
4BARCH = 4b16/16/2
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
matmul-mt.s
matmul-mt.bin diverge.bin
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
matmul-mt.out
matmul-mt.out diverge.out
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \
bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d \
diverge.d diverge.4b.d
%.4b.out : %.4b.bin
$(HARPEM) -a $(4BARCH) -c $< > $@
@ -18,50 +20,11 @@ disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d b
%.out : %.bin
$(HARPEM) -c $< > $@
2thread.bin : boot.HOF lib.HOF 2thread.HOF
$(HARPLD) -o 2thread.bin $^
2thread.4b.bin : boot.4b.HOF lib.4b.HOF 2thread.4b.HOF
$(HARPLD) --arch $(4BARCH) -o 2thread.4b.bin $^
bubble.bin : boot.HOF lib.HOF bubble.HOF
$(HARPLD) -o bubble.bin $^
bubble.4b.bin : boot.4b.HOF lib.4b.HOF bubble.4b.HOF
$(HARPLD) --arch $(4BARCH) -o bubble.4b.bin $^
simple.bin : boot.HOF lib.HOF simple.HOF
$(HARPLD) -o $@ $^
sieve.bin : boot.HOF lib.HOF sieve.HOF
$(HARPLD) -o $@ $^
dotprod.bin : boot.HOF lib.HOF dotprod.HOF
$(HARPLD) -o $@ $^
matmul.bin : boot.HOF lib.HOF matmul.HOF
$(HARPLD) -o $@ $^
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
$(HARPLD) -o $@ $^
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
%.4b.bin : boot.4b.HOF lib.4b.HOF %.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
sieve.4b.bin : boot.4b.HOF lib.4b.HOF sieve.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
dotprod.4b.bin : boot.4b.HOF lib.4b.HOF dotprod.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
matmul.4b.bin : boot.4b.HOF lib.4b.HOF matmul.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $^
%.4b.bin : %.4b.HOF
$(HARPLD) --arch $(4BARCH) -o $@ $<
%.bin : %.HOF
$(HARPLD) -o $@ $<
%.bin : boot.HOF lib.HOF %.HOF
$(HARPLD) -o $@ $^
%.4b.HOF : %.s
$(HARPAS) --arch $(4BARCH) -o $@ $<

64
src/test/diverge.s Normal file
View file

@ -0,0 +1,64 @@
/*******************************************************************************
Harptools by Chad D. Kersey, Summer 2011
********************************************************************************
Sample HARP assmebly program.
*******************************************************************************/
/* Divergent branch: test immediate postdominator branch divergence support. */
.def THREADS 8
.align 4096
.perm x
.entry
.global
entry:
ldi %r0, #1
ldi %r1, THREADS
sloop: clone %r0
addi %r0, %r0, #1
sub %r2, %r1, %r0
rtop @p0, %r2
@p0 ? jmpi sloop
ldi %r0, #0
jalis %r5, %r1, dthread;
ldi %r0, #0
ldi %r1, (__WORD * THREADS)
ploop: ld %r7, %r0, array
jali %r5, printdec
addi %r0, %r0, __WORD
sub %r7, %r1, %r0
rtop @p0, %r7
@p0 ? jmpi ploop
trap;
dthread: ldi %r1, #10
ldi %r2, #0
loop: andi %r3, %r0, #1
rtop @p1, %r3
@p1 ? split
@p1 ? jmpi else
add %r2, %r2, %r0
jmpi after
else: sub %r2, %r2, %r0
after: join
subi %r1, %r1, #1
rtop @p0, %r1
@p0 ? jmpi loop
shli %r4, %r0, (`__WORD)
st %r2, %r4, array
jmprt %r5;
.align 4096
array: .space 4096