mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
Merge branch 'harmonica-iset' of https://github.com/cdkersey/harptool into harmonica-iset
This commit is contained in:
commit
f544ca1e49
8 changed files with 158 additions and 59 deletions
|
@ -132,7 +132,8 @@ The bit fields in the instruction encodings depend heavily on this quality.
|
|||
30 "skep" 1REG 31 "reti" NONE 32 "tlbrm" 1REG
|
||||
33 "itof" 2REG 34 "ftoi" 2REG 35 "fadd" 3REG
|
||||
36 "fsub" 3REG 37 "fmul" 3REG 38 "fdiv" 3REG
|
||||
39 "fneg" 2REG
|
||||
39 "fneg" 2REG 3a "wspawn" 2REG 3b "split" NONE
|
||||
3c "join" NONE
|
||||
\end{verbatim}
|
||||
|
||||
\subsection{Word Encoding}
|
||||
|
@ -346,6 +347,8 @@ format, which can be fixed point or floating point.
|
|||
\texttt{jalis} \%link, \%n, \textsc{\#RelDest}&Jump and link immediate, spawning N active lanes.\\
|
||||
\texttt{jalrs} \%link, \%n, \%dest&Jump and link indirect, spawning N active lanes.\\
|
||||
\texttt{jmprt} \%addr&Jump indirect, terminating execution on all but a single lane.\\
|
||||
\texttt{split}&Control flow diverge.\\
|
||||
\texttt{join}&Control flow reconverge.\\
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
|
@ -412,6 +415,14 @@ The current response to this is to trap to the operating system (interrupt numbe
|
|||
The \texttt{clone}, \texttt{jalis}, \texttt{jalrs}, and \texttt{jmprt} instructions form the basis of SIMD context control in the HARP instruction set.
|
||||
Context is created using \texttt{clone}, the waiting threads are spawned using \texttt{jalrs} or \texttt{jalis}, ``jump-and-link immediate/register and spawn'', and finally the parallel section returns using \texttt{jmprt}, ``jump register and terminate'', best thought of as ``return and terminate.''
|
||||
|
||||
There are times when a control flow operation will need to be predicated, going one direction on some lanes and the other direction on other lanes.
|
||||
For this, the HARP instruction set provides the \texttt{split} and \texttt{join} instructions.
|
||||
When a predicated \texttt{split} is first encountered, only the lanes for which the \texttt{split}'s predicate are true are allowed to continue.
|
||||
The other lanes are masked out until the corresponding \texttt{join} is encountered.
|
||||
The first time \texttt{join} is reached, control flow returns to the instruction following the corresponding \texttt{split} with the set of masked-out lanes complemented.
|
||||
The second time the same \texttt{join} is reached, control flow falls through and the original lane mask is restored.
|
||||
A hardware stack is maintained to keep track of nested \texttt{split}s.
|
||||
|
||||
\section{Default I/O Devices}
|
||||
The emulator currently only supports a single I/O device, simple console I/O.
|
||||
Writing to the address \texttt{0x800...0} (an address with its MSB set and all other bits cleared) causes text to be written to the display.
|
||||
|
|
|
@ -8,7 +8,7 @@ PREFIX ?= /usr/local
|
|||
|
||||
LIB_OBJS=args.o obj.o mem.o core.o instruction.o enc.o util.o lex.yy.o
|
||||
|
||||
all: harptool libharplib.so libharplib.a libqsim-harp.so
|
||||
all: harptool libharplib.so libharplib.a # libqsim-harp.so
|
||||
|
||||
# Use -static so we don't have to install the library in order to just run
|
||||
# Harptool.
|
||||
|
|
|
@ -46,6 +46,8 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id) :
|
|||
for (Word i = 0; i < a.getNPRegs(); ++i) {
|
||||
pred[j].push_back(Reg<bool>(id, regNum++));
|
||||
}
|
||||
|
||||
tmask.push_back(true);
|
||||
}
|
||||
|
||||
/* Set initial register contents. */
|
||||
|
@ -131,6 +133,11 @@ void Core::step() {
|
|||
D_RAW(" (");
|
||||
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
|
||||
D_RAW(')' << endl);
|
||||
|
||||
D(3, "Thread mask:");
|
||||
D_RAW(" ");
|
||||
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
|
||||
D_RAW(endl);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
|
@ -26,7 +27,7 @@ namespace Harp {
|
|||
|
||||
Reg &operator=(T r) { val = r; doWrite(); return *this; }
|
||||
|
||||
operator T() { doRead(); return val; }
|
||||
operator T() const { doRead(); return val; }
|
||||
|
||||
void trunc(Size s) {
|
||||
Word mask((~0ull >> (sizeof(Word)-s)*8));
|
||||
|
@ -39,14 +40,36 @@ namespace Harp {
|
|||
|
||||
#ifdef EMU_INSTRUMENTATION
|
||||
/* Access size here is 8, representing the register size of 64-bit cores. */
|
||||
void doWrite() { reg_doWrite(cpuId, regNum); }
|
||||
void doRead() { reg_doRead(cpuId, regNum); }
|
||||
void doWrite() const { reg_doWrite(cpuId, regNum); }
|
||||
void doRead() const { reg_doRead(cpuId, regNum); }
|
||||
#else
|
||||
void doWrite() {}
|
||||
void doRead() {}
|
||||
void doWrite() const {}
|
||||
void doRead() const {}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Entry in the IPDOM Stack
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(
|
||||
unsigned p, const std::vector<std::vector<Reg<bool> > >& m, Word pc
|
||||
): pc(pc), fallThrough(false)
|
||||
{
|
||||
std::cout << "New DomStackEntry:";
|
||||
for (unsigned i = 0; i < m.size(); ++i) {
|
||||
tmask.push_back(!bool(m[i][p]));
|
||||
std::cout << ' ' << bool(m[i][p]);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
DomStackEntry(const std::vector<bool> &tmask):
|
||||
tmask(tmask), fallThrough(true) {}
|
||||
|
||||
bool fallThrough;
|
||||
std::vector<bool> tmask;
|
||||
Word pc;
|
||||
};
|
||||
|
||||
class Core {
|
||||
public:
|
||||
Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id=0);
|
||||
|
@ -67,6 +90,9 @@ namespace Harp {
|
|||
std::vector<std::vector<Reg<Word> > > reg;
|
||||
std::vector<std::vector<Reg<bool> > > pred;
|
||||
|
||||
std::vector<bool> tmask;
|
||||
std::stack<DomStackEntry> domStack;
|
||||
|
||||
std::vector<Word> shadowReg;
|
||||
std::vector<bool> shadowPReg;
|
||||
|
||||
|
|
|
@ -30,7 +30,8 @@ namespace Harp {
|
|||
JALI, JALR, JMPI, JMPR, CLONE, JALIS, JALRS,
|
||||
JMPRT, LD, ST, LDI, RTOP, ANDP, ORP, XORP, NOTP, ISNEG,
|
||||
ISZERO, HALT, TRAP, JMPRU, SKEP, RETI, TLBRM,
|
||||
ITOF, FTOI, FADD, FSUB, FMUL, FDIV, FNEG, WSPAWN };
|
||||
ITOF, FTOI, FADD, FSUB, FMUL, FDIV, FNEG, WSPAWN,
|
||||
SPLIT, JOIN };
|
||||
enum ArgClass {
|
||||
AC_NONE, AC_2REG, AC_2IMM, AC_3REG, AC_3PREG, AC_3IMM, AC_3REGSRC,
|
||||
AC_1IMM, AC_1REG, AC_3IMMSRC, AC_PREG_REG, AC_2PREG, AC_2REGSRC
|
||||
|
|
|
@ -81,6 +81,8 @@ Instruction::InstTableEntry Instruction::instTable[] = {
|
|||
{"fdiv", false, false, false, false, AC_3REG, ITYPE_FPDIV },
|
||||
{"fneg", false, false, false, false, AC_2REG, ITYPE_FPBASIC },
|
||||
{"wspawn", false, false, true, false, AC_2REGSRC, ITYPE_NULL },
|
||||
{"split", false, false, true, false, AC_NONE, ITYPE_NULL },
|
||||
{"join", false, false, true, false, AC_NONE, ITYPE_NULL },
|
||||
{NULL,false,false,false,false,AC_NONE,ITYPE_NULL}/////// End of table.
|
||||
};
|
||||
|
||||
|
@ -119,12 +121,16 @@ void Instruction::executeOn(Core &c) {
|
|||
return;
|
||||
}
|
||||
|
||||
/* Also throw exceptions on divergent branches. */
|
||||
if (predicated && instTable[op].controlFlow) {
|
||||
bool p0 = c.pred[0][pred];
|
||||
for (Size t = 1; t < c.activeThreads; t++) {
|
||||
if (c.pred[t][pred] != p0) throw DivergentBranchException();
|
||||
/* Also throw exceptions on non-masked divergent branches. */
|
||||
if (instTable[op].controlFlow) {
|
||||
Size t, count, active;
|
||||
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
|
||||
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
|
||||
if (c.tmask[t]) ++active;
|
||||
}
|
||||
|
||||
if (count != 0 && count != active)
|
||||
throw DivergentBranchException();
|
||||
}
|
||||
|
||||
Size nextActiveThreads = c.activeThreads;
|
||||
|
@ -133,8 +139,12 @@ void Instruction::executeOn(Core &c) {
|
|||
for (Size t = 0; t < c.activeThreads; t++) {
|
||||
vector<Reg<Word> > ®(c.reg[t]);
|
||||
vector<Reg<bool> > &pReg(c.pred[t]);
|
||||
stack<DomStackEntry> &domStack(c.domStack);
|
||||
|
||||
if (predicated && !pReg[pred]) continue;
|
||||
// If this thread is masked out, don't execute the instruction, unless it's
|
||||
// a split or join.
|
||||
if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
|
||||
op != SPLIT && op != JOIN) continue;
|
||||
|
||||
Word memAddr;
|
||||
switch (op) {
|
||||
|
@ -288,6 +298,23 @@ void Instruction::executeOn(Core &c) {
|
|||
case FDIV: reg[rdest] = Float(double(Float(reg[rsrc[0]], wordSz)) /
|
||||
double(Float(reg[rsrc[1]], wordSz)),wordSz);
|
||||
break;
|
||||
case SPLIT:if (t == 0) {
|
||||
// TODO: if mask becomes all-zero, fall through
|
||||
DomStackEntry e(pred, c.pred, c.pc);
|
||||
c.domStack.push(c.tmask);
|
||||
c.domStack.push(e);
|
||||
for (unsigned i = 0; i < e.tmask.size(); ++i)
|
||||
c.tmask[i] = !e.tmask[i];
|
||||
}
|
||||
break;
|
||||
case JOIN: if (t == 0) {
|
||||
// TODO: if mask becomes all-zero, fall through
|
||||
if (!c.domStack.top().fallThrough)
|
||||
c.pc = c.domStack.top().pc;
|
||||
c.tmask = c.domStack.top().tmask;
|
||||
c.domStack.pop();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cout << "ERROR: Unsupported instruction: " << *this << "\n";
|
||||
exit(1);
|
||||
|
|
|
@ -5,12 +5,14 @@ HARPDIS = ../harptool -D
|
|||
4BARCH = 4b16/16/2
|
||||
|
||||
all: simple.bin sieve.bin 2thread.bin simple.4b.bin sieve.4b.bin 2thread.4b.bin bubble.bin bubble.4b.bin dotprod.bin dotprod.4b.bin matmul.bin matmul.4b.bin \
|
||||
matmul-mt.s
|
||||
matmul-mt.bin diverge.bin
|
||||
|
||||
run: simple.out sieve.out 2thread.out simple.4b.out sieve.4b.out 2thread.4b.out bubble.out bubble.4b.out dotprod.out dotprod.4b.out matmul.out matmul.4b.out\
|
||||
matmul-mt.out
|
||||
matmul-mt.out diverge.out
|
||||
|
||||
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d
|
||||
disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d \
|
||||
bubble.4b.d dotprod.d dotprod.4b.d matmul.d matmul.4b.d matmul-mt.d \
|
||||
diverge.d diverge.4b.d
|
||||
|
||||
%.4b.out : %.4b.bin
|
||||
$(HARPEM) -a $(4BARCH) -c $< > $@
|
||||
|
@ -18,50 +20,11 @@ disas: simple.d sieve.d 2thread.d simple.4b.d sieve.4b.d 2thread.4b.d bubble.d b
|
|||
%.out : %.bin
|
||||
$(HARPEM) -c $< > $@
|
||||
|
||||
2thread.bin : boot.HOF lib.HOF 2thread.HOF
|
||||
$(HARPLD) -o 2thread.bin $^
|
||||
|
||||
2thread.4b.bin : boot.4b.HOF lib.4b.HOF 2thread.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o 2thread.4b.bin $^
|
||||
|
||||
bubble.bin : boot.HOF lib.HOF bubble.HOF
|
||||
$(HARPLD) -o bubble.bin $^
|
||||
|
||||
bubble.4b.bin : boot.4b.HOF lib.4b.HOF bubble.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o bubble.4b.bin $^
|
||||
|
||||
simple.bin : boot.HOF lib.HOF simple.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
sieve.bin : boot.HOF lib.HOF sieve.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
dotprod.bin : boot.HOF lib.HOF dotprod.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
matmul.bin : boot.HOF lib.HOF matmul.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
matmul-mt.bin : boot.HOF lib.HOF matmul-mt.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
simple.4b.bin : boot.4b.HOF lib.4b.HOF simple.4b.HOF
|
||||
%.4b.bin : boot.4b.HOF lib.4b.HOF %.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
sieve.4b.bin : boot.4b.HOF lib.4b.HOF sieve.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
dotprod.4b.bin : boot.4b.HOF lib.4b.HOF dotprod.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
matmul.4b.bin : boot.4b.HOF lib.4b.HOF matmul.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $^
|
||||
|
||||
%.4b.bin : %.4b.HOF
|
||||
$(HARPLD) --arch $(4BARCH) -o $@ $<
|
||||
|
||||
%.bin : %.HOF
|
||||
$(HARPLD) -o $@ $<
|
||||
%.bin : boot.HOF lib.HOF %.HOF
|
||||
$(HARPLD) -o $@ $^
|
||||
|
||||
%.4b.HOF : %.s
|
||||
$(HARPAS) --arch $(4BARCH) -o $@ $<
|
||||
|
|
64
src/test/diverge.s
Normal file
64
src/test/diverge.s
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*******************************************************************************
|
||||
Harptools by Chad D. Kersey, Summer 2011
|
||||
********************************************************************************
|
||||
|
||||
Sample HARP assmebly program.
|
||||
|
||||
*******************************************************************************/
|
||||
/* Divergent branch: test immediate postdominator branch divergence support. */
|
||||
.def THREADS 8
|
||||
|
||||
.align 4096
|
||||
.perm x
|
||||
.entry
|
||||
.global
|
||||
entry:
|
||||
ldi %r0, #1
|
||||
ldi %r1, THREADS
|
||||
sloop: clone %r0
|
||||
|
||||
addi %r0, %r0, #1
|
||||
sub %r2, %r1, %r0
|
||||
rtop @p0, %r2
|
||||
@p0 ? jmpi sloop
|
||||
|
||||
ldi %r0, #0
|
||||
jalis %r5, %r1, dthread;
|
||||
|
||||
ldi %r0, #0
|
||||
ldi %r1, (__WORD * THREADS)
|
||||
|
||||
ploop: ld %r7, %r0, array
|
||||
jali %r5, printdec
|
||||
|
||||
addi %r0, %r0, __WORD
|
||||
sub %r7, %r1, %r0
|
||||
rtop @p0, %r7
|
||||
@p0 ? jmpi ploop
|
||||
|
||||
trap;
|
||||
|
||||
|
||||
dthread: ldi %r1, #10
|
||||
ldi %r2, #0
|
||||
|
||||
loop: andi %r3, %r0, #1
|
||||
rtop @p1, %r3
|
||||
@p1 ? split
|
||||
@p1 ? jmpi else
|
||||
add %r2, %r2, %r0
|
||||
jmpi after
|
||||
else: sub %r2, %r2, %r0
|
||||
after: join
|
||||
|
||||
subi %r1, %r1, #1
|
||||
rtop @p0, %r1
|
||||
@p0 ? jmpi loop
|
||||
|
||||
shli %r4, %r0, (`__WORD)
|
||||
st %r2, %r4, array
|
||||
|
||||
jmprt %r5;
|
||||
|
||||
.align 4096
|
||||
array: .space 4096
|
Loading…
Add table
Add a link
Reference in a new issue