Fixed emulator

This commit is contained in:
felsabbagh3 2019-11-06 23:30:07 -05:00
parent 60e6ff0b42
commit 87ae5c8cdf
23 changed files with 947 additions and 793 deletions

Binary file not shown.

View file

@ -3,6 +3,7 @@
*******************************************************************************/
#include <iostream>
#include <iomanip>
// #define USE_DEBUG 7
// #define PRINT_ACTIVE_THREADS
@ -36,7 +37,7 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
a(a), iDec(d), mem(mem), steps(0)
{
for (unsigned i = 0; i < a.getNWarps(); ++i)
w.push_back(Warp(this));
w.push_back(Warp(this, i));
w[0].activeThreads = 1;
w[0].spawned = true;
@ -53,6 +54,7 @@ void Core::step() {
cout << endl << "Threads:";
#endif
for (unsigned i = 0; i < w.size(); ++i) {
if (w[i].activeThreads) {
D(3, "Core step stepping warp " << i << '[' << w[i].activeThreads << ']');
@ -93,11 +95,12 @@ void Core::printStats() const {
}
Warp::Warp(Core *c, Word id) :
core(c), pc(0), interruptEnable(true),
core(c), pc(0x80000000), interruptEnable(true),
supervisorMode(true), activeThreads(0), reg(0), pred(0),
shadowReg(core->a.getNRegs()), shadowPReg(core->a.getNPRegs()), id(id),
spawned(false), steps(0), insts(0), loads(0), stores(0)
{
D(3, "Creating a new thread with PC: " << hex << this->pc << '\n');
/* Build the register file. */
Word regNum(0);
for (Word j = 0; j < core->a.getNThds(); ++j) {
@ -111,8 +114,10 @@ Warp::Warp(Core *c, Word id) :
pred[j].push_back(Reg<bool>(id, regNum++));
}
tmask.push_back(true);
shadowTmask.push_back(true);
bool act = false;
if (j == 0) act = true;
tmask.push_back(act);
shadowTmask.push_back(act);
}
Word csrNum(0);
@ -197,9 +202,9 @@ void Warp::step() {
if (USE_DEBUG >= 3) {
D(3, "Register state:");
for (unsigned i = 0; i < reg[0].size(); ++i) {
D_RAW(" %r" << dec << i << ':');
D_RAW(" %r" << setfill(' ') << setw(2) << dec << i << ':');
for (unsigned j = 0; j < reg.size(); ++j)
D_RAW(' ' << hex << reg[j][i] << ' ');
D_RAW(' ' << setfill('0') << setw(8) << hex << reg[j][i] << setfill(' ') << ' ');
D_RAW('(' << shadowReg[i] << ')' << endl);
}
// D(3, "Predicate state:");

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -4,7 +4,7 @@
#ifndef __DEBUG_H
#define __DEBUG_H
// #define USE_DEBUG 9
#define USE_DEBUG 9
#ifdef USE_DEBUG
#include <iostream>

View file

@ -138,18 +138,38 @@ void Instruction::executeOn(Warp &c) {
bool join = (op == GPGPU) && (func3 == 3);
predicated = (op == GPGPU) && ((func3 == 7) || (func3 == 2));
// predicated = (op == GPGPU) && ((func3 == 7) || (func3 == 2));
// bool is_branch = (op == B_INST);
// bool is_jump = (op == JAL_INST) || (op == JALR_INST);
bool is_gpgpu = (op == GPGPU);
bool is_tmc = is_gpgpu && (func3 == 0);
bool is_wspawn = is_gpgpu && (func3 == 1);
bool is_barrier = is_gpgpu && (func3 == 4);
bool is_split = is_gpgpu && (func3 == 2);
bool is_join = is_gpgpu && (func3 == 3);
bool gpgpu_zero = (is_tmc || is_barrier || is_wspawn) && (t != 0);
bool not_active = !c.tmask[t];
if (not_active || gpgpu_zero)
{
continue;
}
// printf("Predicated: %d, split: %d, join: %d\n",predicated, split, join );
// printf("%d && ((%d) || (%d))\n",(op == GPGPU), (func3 == 7), (func3 == 2) );
// cout << "before " << op << " = " << GPGPU << "\n";
if (((predicated && !reg[pred]) || !c.tmask[t]) && !split && !join)
{
// cout << "about to continue\n";
continue;
}
// if (((predicated && !reg[pred]) || !c.tmask[t]) && !split && !join)
// {
// // cout << "about to continue\n";
// continue;
// }
// cout << "after\n";
++c.insts;
@ -163,6 +183,7 @@ void Instruction::executeOn(Warp &c) {
bool m_exten;
// std::cout << "op = " << op << "\n";
// std::cout << "R_INST: " << R_INST << "\n";
int num_to_wspawn;
switch (op) {
case NOP:
@ -462,11 +483,11 @@ void Instruction::executeOn(Warp &c) {
//std::cout << "S_INST\n";
++c.stores;
memAddr = reg[rsrc[0]] + immsrc;
// //std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
// //std::cout << "FUNC3: " << func3 << "\n";
if (memAddr == 0x00010000)
if ((memAddr == 0x00010000) && (t == 0))
{
std::cout << (char) reg[rsrc[1]];
fprintf(stderr, "%c", (char) reg[rsrc[1]]);
break;
}
switch (func3)
@ -558,7 +579,7 @@ void Instruction::executeOn(Warp &c) {
case JAL_INST:
//std::cout << "JAL_INST\n";
if (!pcSet) nextPc = (c.pc - 4) + immsrc;
if (!pcSet) //std::cout << "JAL... SETTING PC: " << nextPc << "\n";
if (!pcSet) {/*std::cout << "JAL... SETTING PC: " << nextPc << "\n"; */}
if (rdest != 0)
{
reg[rdest] = c.pc;
@ -566,9 +587,9 @@ void Instruction::executeOn(Warp &c) {
pcSet = true;
break;
case JALR_INST:
//std::cout << "JALR_INST\n";
std::cout << "JALR_INST\n";
if (!pcSet) nextPc = reg[rsrc[0]] + immsrc;
if (!pcSet) //std::cout << "JALR... SETTING PC: " << nextPc << "\n";
if (!pcSet) {/*std::cout << "JALR... SETTING PC: " << nextPc << "\n";*/ }
if (rdest != 0)
{
reg[rdest] = c.pc;
@ -578,76 +599,85 @@ void Instruction::executeOn(Warp &c) {
case SYS_INST:
//std::cout << "SYS_INST\n";
temp = reg[rsrc[0]];
switch (func3)
if (immsrc == 0x20) // ThreadID
{
case 1:
// printf("Case 1\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = temp;
break;
case 2:
// printf("Case 2\n");
if (rdest != 0)
{
// printf("Reading from CSR: %d = %d\n", (immsrc & 0x00000FFF), c.csr[immsrc & 0x00000FFF]);
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
// printf("Writing to CSR --> %d = %d\n", immsrc, (temp | c.csr[immsrc & 0x00000FFF]));
c.csr[immsrc & 0x00000FFF] = temp | c.csr[immsrc & 0x00000FFF];
break;
case 3:
// printf("Case 3\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = temp & (~c.csr[immsrc & 0x00000FFF]);
break;
case 5:
// printf("Case 5\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = rsrc[0];
break;
case 6:
// printf("Case 6\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = rsrc[0] | c.csr[immsrc & 0x00000FFF];
break;
case 7:
// printf("Case 7\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = rsrc[0] & (~c.csr[immsrc & 0x00000FFF]);
break;
case 0:
if (immsrc < 2)
{
//std::cout << "INTERRUPT ECALL/EBREAK\n";
nextActiveThreads = 0;
c.spawned = false;
// c.interrupt(0);
}
break;
default:
break;
reg[rdest] = t;
D(2, "CSR Reading tid " << hex << immsrc << dec << " and returning " << reg[rdest]);
} else if (immsrc == 0x21) // WarpID
{
reg[rdest] = c.id;
D(2, "CSR Reading wid " << hex << immsrc << dec << " and returning " << reg[rdest]);
}
// switch (func3)
// {
// case 1:
// // printf("Case 1\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = temp;
// break;
// case 2:
// // printf("Case 2\n");
// if (rdest != 0)
// {
// // printf("Reading from CSR: %d = %d\n", (immsrc & 0x00000FFF), c.csr[immsrc & 0x00000FFF]);
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// // printf("Writing to CSR --> %d = %d\n", immsrc, (temp | c.csr[immsrc & 0x00000FFF]));
// c.csr[immsrc & 0x00000FFF] = temp | c.csr[immsrc & 0x00000FFF];
// break;
// case 3:
// // printf("Case 3\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = temp & (~c.csr[immsrc & 0x00000FFF]);
// break;
// case 5:
// // printf("Case 5\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = rsrc[0];
// break;
// case 6:
// // printf("Case 6\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = rsrc[0] | c.csr[immsrc & 0x00000FFF];
// break;
// case 7:
// // printf("Case 7\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = rsrc[0] & (~c.csr[immsrc & 0x00000FFF]);
// break;
// case 0:
// if (immsrc < 2)
// {
// //std::cout << "INTERRUPT ECALL/EBREAK\n";
// nextActiveThreads = 0;
// c.spawned = false;
// // c.interrupt(0);
// }
// break;
// default:
// break;
// }
break;
case TRAP:
//std::cout << "INTERRUPT TRAP\n";
@ -670,30 +700,44 @@ void Instruction::executeOn(Warp &c) {
//std::cout << "GPGPU\n";
switch(func3)
{
case 0:
case 1:
// WSPAWN
//std::cout << "WSPAWN\n";
std::cout << "WSPAWN\n";
if (sjOnce)
{
sjOnce = false;
D(0, "Spawning a new warp.");
// //std::cout << "SIZE: " << c.core->w.size() << "\n";
for (unsigned i = 0; i < c.core->w.size(); ++i)
num_to_wspawn = reg[rsrc[0]];
D(0, "Spawning " << num_to_wspawn << " new warps at PC: " << hex << reg[rsrc[1]]);
for (unsigned i = 1; i < num_to_wspawn; ++i)
{
// std::cout << "SPAWNING WARP\n";
Warp &newWarp(c.core->w[i]);
// //std::cout << "STARTING\n";
if (newWarp.spawned == false) {
// if (newWarp.spawned == false)
{
// //std::cout << "ABOUT TO START\n";
newWarp.pc = reg[rsrc[0]];
newWarp.reg[0] = reg;
newWarp.csr = c.csr;
newWarp.pc = reg[rsrc[1]];
// newWarp.reg[0] = reg;
// newWarp.csr = c.csr;
for (int kk = 0; kk < newWarp.tmask.size(); kk++)
{
if (kk == 0)
{
newWarp.tmask[kk] = true;
}
else
{
newWarp.tmask[kk] = false;
}
}
newWarp.activeThreads = 1;
newWarp.supervisorMode = false;
newWarp.spawned = true;
break;
}
}
break;
}
break;
case 2:
@ -704,12 +748,16 @@ void Instruction::executeOn(Warp &c) {
{
sjOnce = false;
if (checkUnanimous(pred, c.reg, c.tmask)) {
//std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
DomStackEntry e(c.tmask);
e.uni = true;
c.domStack.push(e);
break;
}
cout << "Split: Original TM: ";
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
DomStackEntry e(pred, c.reg, c.tmask, c.pc);
c.domStack.push(c.tmask);
c.domStack.push(e);
@ -717,49 +765,79 @@ void Instruction::executeOn(Warp &c) {
{
c.tmask[i] = !e.tmask[i] && c.tmask[i];
}
cout << "Split: New TM\n";
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
cout << "Split: Pushed TM PC: " << hex << e.pc << dec << "\n";
for (auto y : e.tmask) cout << y << " ";
cout << "\n";
}
}
break;
}
case 3:
// JOIN
//std::cout << "JOIN\n";
D(3, "JOIN INSTRUCTION");
if (sjOnce)
{
sjOnce = false;
if (!c.domStack.empty() && c.domStack.top().uni) {
D(2, "Uni branch at join");
printf("NEW DOMESTACK: \n");
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
break;
}
if (!c.domStack.top().fallThrough) {
if (!pcSet) nextPc = c.domStack.top().pc;
if (!pcSet) {
nextPc = c.domStack.top().pc;
cout << "join: NOT FALLTHROUGH PC: " << hex << nextPc << dec << '\n';
}
pcSet = true;
}
cout << "Join: Old TM: ";
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
c.tmask = c.domStack.top().tmask;
cout << "Join: New TM: " << '\n';
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
c.domStack.pop();
}
break;
case 4:
// JMPRT
//std::cout << "JMPRT\n";
nextActiveThreads = 1;
if (!pcSet) nextPc = reg[rsrc[0]];
pcSet = true;
// is_barrier
break;
case 5:
// CLONE
//std::cout << "CLONE\n";
// //std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
c.reg[reg[rsrc[0]]] = reg;
break;
case 6:
// JALRS
case 0:
// TMC
//std::cout << "JALRS\n";
nextActiveThreads = reg[rsrc[1]];
reg[rdest] = c.pc;
if (!pcSet) nextPc = reg[rsrc[0]];
pcSet = true;
nextActiveThreads = reg[rsrc[0]];
{
for (int ff = 0; ff < c.tmask.size(); ff++)
{
if (ff < nextActiveThreads)
{
c.tmask[ff] = true;
}
else
{
c.tmask[ff] = false;
}
}
}
if (nextActiveThreads == 0)
{
c.spawned = false;
}
// reg[rdest] = c.pc;
// if (!pcSet) nextPc = reg[rsrc[0]];
// pcSet = true;
// //std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
// //std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
break;
@ -794,7 +872,11 @@ void Instruction::executeOn(Warp &c) {
// This way, if pc was set by a side effect (such as interrupt), it will
// retain its new value.
if (pcSet) c.pc = nextPc;
if (pcSet)
{
c.pc = nextPc;
cout << "Next PC: " << hex << nextPc << dec << "\n";
}
if (nextActiveThreads > c.reg.size()) {
cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. "

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -102,10 +102,10 @@ void MemoryUnit::ADecoder::write(Addr a, Word w, bool sup, Size wordSize) {
Word before = m.read(a);
Word new_word = w;
if (a == 0x8000012c)
{
printf("WRITING TO 0x8000012c -> %d\n", w);
}
// if (a == 0x00010000)
// {
// fprintf(stderr, "%c", w);
// }
if (wordSize == 8)
{

Binary file not shown.

View file

@ -1,4 +1,4 @@
echo start > results.txt
echo ../kernel/vortex_test.hex
./harptool -E -a rv32i --core ../kernel/vortex_test.hex -s -b
./harptool -E -a rv32i --core ../runtime/vortex_runtime.hex -s -b 1> emulator.debug

Binary file not shown.

View file

@ -69,7 +69,7 @@ int main()
// unsigned scal = 3;
// // matrix element add
// vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS);
vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS);
// vx_print_str("\n\nMatrix Element Addition\n");
// print_matrix(z);

View file

@ -19,7 +19,7 @@ vx_set_sp:
.word 0x0005006b # tmc 4
csrr a3, 0x21 # get wid
slli a3, a3, 15 # shift by wid
slli a3, a3, 0x1a # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4

View file

@ -106,14 +106,14 @@ void test_wsapwn()
void intrinsics_tests()
{
// // TMC test
// test_tmc();
// TMC test
test_tmc();
// // Control Divergence Test
// vx_print_str("test_divergence\n");
// vx_tmc(4);
// test_divergence();
// vx_tmc(1);
// Control Divergence Test
vx_print_str("test_divergence\n");
vx_tmc(4);
test_divergence();
vx_tmc(1);
// Test wspawn

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -1,7 +1,7 @@
:0200000480007A
:100000009705000093850502130540006B10B500AD
:10001000EF000001EF000072130500006B00050007
:10002000130540006B000500F32610029396F600BE
:10001000EF000001EF008074130500006B00050085
:10002000130540006B000500F32610029396A6010D
:10003000732600029315A6001316260037F1FF6FF2
:100040003301B1403301D1403301C100F326100226
:1000500063860600130500006B0005006780000042
@ -18,7 +18,7 @@
:100100001301C10067800000B702010023A0B20004
:1001100067800000130101FE232E1100232C8100B3
:10012000130401022326A4FEB70700810327C4FE9F
:10013000131727009387470FB307F70083A707001C
:10013000131727009387C70FB307F70083A707009C
:1001400013850700EFF05FF9130000008320C10161
:10015000032481011301010267800000130101FEE5
:10016000232E1100232C8100130401022326A4FE58
@ -26,100 +26,115 @@
:10018000EFF05FF9B707008113850704EFF0DFF4A4
:10019000130000008320C101032481011301010227
:1001A00067800000130101FE232E1100232C810023
:1001B00013040102B707008183A7C7231385070033
:1001C000EFF09FEAB707008103A78723B7070081F5
:1001D00083A7472313850700E7000700EFF0DFEA56
:1001B00013040102B707008183A7472413850700B2
:1001C000EFF09FEAB707008103A70724B707008174
:1001D00083A7C72313850700E7000700EFF0DFEAD6
:1001E0002326A4FE8327C4FE6388070013050000AE
:1001F000EFF09FE76F00C00013051000EFF0DFE69F
:10020000130000008320C1010324810113010102B6
:1002100067800000130101FE232E1100232C8100B2
:10022000130401022326A4FE2324B4FE2322C4FEC9
:100230002320D4FEB7070081032744FE23ACE72226
:10024000B7070081032704FE23AAE722B70700812E
:10025000032784FE23AEE722832744FE938507000D
:100260000325C4FEEFF0DFDFEFF0DFF31300000043
:100270008320C10103248101130101026780000072
:10028000130101FE232E1100232C8100130401020F
:10029000B707008113854708EFF01FE413054000FE
:1002A000EFF09FDCEFF0DFDE2326A4FE0327C4FE81
:1002B000B70700818326C4FE939626009387072400
:1002C000B387F60023A0E70013051000EFF0DFD995
:1002D000B707008183A7072413850700EFF09FE38A
:1002E000B707008113850709EFF01FDFB70700810B
:1002F0009387072483A7470013850700EFF09FE14A
:10030000B707008113850709EFF01FDDB7070081EC
:100310009387072483A7870013850700EFF09FDFEB
:10032000B707008113850709EFF01FDBB7070081CE
:100330009387072483A7C70013850700EFF09FDD8D
:10034000B707008113850709EFF01FD913000000DC
:100350008320C10103248101130101026780000091
:10036000130101FE232E1100232C8100130401022E
:10037000EFF01FD22326A4FE8327C4FE93B72700E5
:10038000A305F4FE8347B4FE13850700EFF0DFCE2C
:100390008347B4FE638407068327C4FE93B7170020
:1003A0002305F4FE8347A4FE13850700EFF0DFCC9E
:1003B0008347A4FE63820702B70700810327C4FEB8
:1003C0001317270093870724B307F7001307A0002C
:1003D00023A0E7006F000002B70700810327C4FED7
:1003E0001317270093870724B307F7001307B000FC
:1003F00023A0E700EFF0DFC86F0040068327C4FEAC
:1004000093B73700A304F4FE834794FE13850700D7
:10041000EFF09FC6834794FE63820702B70700810F
:100420000327C4FE1317270093870724B307F70099
:100430001307C00023A0E7006F000002B707008188
:100440000327C4FE1317270093870724B307F70079
:100450001307D00023A0E700EFF09FC2EFF05FC2C8
:10046000B707008183A7072413850700EFF09FCA11
:10047000B707008113850709EFF01FC6B707008192
:100480009387072483A7470013850700EFF09FC8D1
:10049000B707008113850709EFF01FC4B707008174
:1004A0009387072483A7870013850700EFF09FC673
:1004B000B707008113850709EFF01FC2B707008156
:1004C0009387072483A7C70013850700EFF09FC415
:1004D000B707008113850709EFF01FC01300000064
:1004E0008320C10103248101130101026780000000
:1004F000130101FE232E1100232C8100130401029D
:10050000EFF09FB82326A4FEB70700810327C4FE9F
:100510001317270093870725B307F7000327C4FEA7
:1005200023A0E7008327C4FE6386070013050000AD
:10053000EFF09FB3130000008320C1010324810169
:100540001301010267800000130101FE232E110038
:10055000232C810013040102B70700809387074F03
:100560002326F4FE8325C4FE13054000EFF05FAFA1
:10057000EFF01FF8B707008183A707251385070051
:10058000EFF05FB9B707008113850709EFF0DFB41B
:10059000B70700819387072583A7470013850700C6
:1005A000EFF05FB7B707008113850709EFF0DFB2FF
:1005B000B70700819387072583A787001385070066
:1005C000EFF05FB5B707008113850709EFF0DFB0E3
:1005D000B70700819387072583A7C7001385070006
:1005E000EFF05FB3B707008113850709EFF0DFAEC7
:1005F000130000008320C1010324810113010102C3
:1006000067800000130101FF2326110023248100CD
:1006100013040101B707008113854709EFF0DFAB31
:10062000EFF09FF2130000008320C100032481003B
:100630001301010167800000130101FC232E110248
:10064000232C8102130401042326A4FC8327C4FC69
:100650002326F4FEEFF05FA32324A4FEEFF05FA3B4
:100660002322A4FE8327C4FE83A70701032784FE59
:10067000637EF7008327C4FE83A7C700032744FED9
:100680006376F700930710006F0080009307000067
:10069000A301F4FE834734FE93F71700A301F4FE91
:1006A000834734FE13850700EFF01F9D834734FE18
:1006B000638607068327C4FE03A7C700832784FE3B
:1006C000B307F702032744FEB307F700232EF4FC19
:1006D0008327C4FE03A707008327C4FD9397270041
:1006E000B307F70083A607008327C4FE03A74700CC
:1006F0008327C4FD93972700B307F70003A70700DC
:100700008327C4FE03A687008327C4FD9397270091
:10071000B307F6003387E60023A0E700EFF05F960B
:10072000130000008320C10303248103130101048B
:1007300067800000130101FF23261100232481009C
:100740001304010113051000EFF01F92EFF05FB3E7
:10075000B70700811385070EEFF01F9813054000BF
:10076000EFF09F90EFF0DFBF13051000EFF0DF8F89
:1007700093070000138507008320C1000324810034
:08078000130101016780000074
:100230002320D4FEB7070081032744FE23A0E72430
:10024000B7070081032704FE23AEE722B70700812A
:10025000032784FE23A2E724B70700809387471A69
:10026000938507000325C4FEEFF09FDFEFF09FF3B7
:10027000130000008320C101032481011301010246
:1002800067800000130101FE232E1100232C810042
:1002900013040102B707008113854708EFF0DFE37D
:1002A00013054000EFF05FDCEFF09FDE2326A4FE95
:1002B0000327C4FEB70700818326C4FE9396260059
:1002C00093878724B387F60023A0E7001305100067
:1002D000EFF09FD9B707008183A787241385070014
:1002E000EFF05FE3B707008113850709EFF0DFDE6A
:1002F000B70700819387872483A7470013850700EA
:10030000EFF05FE1B707008113850709EFF0DFDC4D
:10031000B70700819387872483A787001385070089
:10032000EFF05FDFB707008113850709EFF0DFDA31
:10033000B70700819387872483A7C7001385070029
:10034000EFF05FDDB707008113850709EFF0DFD815
:10035000130000008320C101032481011301010265
:1003600067800000130101FE232E1100232C810061
:1003700013040102EFF0DFD12326A4FE8327C4FE7D
:1003800093B72700A305F4FE8347B4FE1385070047
:10039000EFF09FCE8347B4FE638407068327C4FE35
:1003A00093B717002305F4FE8347A4FE13850700C7
:1003B000EFF09FCC8347A4FE63820702B70700815A
:1003C0000327C4FE1317270093878724B307F7007A
:1003D0001307A00023A0E7006F000002B707008109
:1003E0000327C4FE1317270093878724B307F7005A
:1003F0001307B00023A0E700EFF09FC86F0040068E
:100400008327C4FE93B73700A304F4FE834794FE0A
:1004100013850700EFF05FC6834794FE63820702EF
:10042000B70700810327C4FE13172700938787248B
:10043000B307F7001307C00023A0E7006F00000216
:10044000B70700810327C4FE13172700938787246B
:10045000B307F7001307D00023A0E700EFF05FC257
:10046000EFF01FC2B707008183A787241385070019
:10047000EFF05FCAB707008113850709EFF0DFC50A
:10048000B70700819387872483A747001385070058
:10049000EFF05FC8B707008113850709EFF0DFC3EE
:1004A000B70700819387872483A7870013850700F8
:1004B000EFF05FC6B707008113850709EFF0DFC1D2
:1004C000B70700819387872483A7C7001385070098
:1004D000EFF05FC4B707008113850709EFF0DFBFB6
:1004E000130000008320C1010324810113010102D4
:1004F00067800000130101FE232E1100232C8100D0
:1005000013040102EFF05FB82326A4FEB7070081B1
:100510000327C4FE1317270093878725B307F70027
:100520000327C4FE23A0E7008327C4FE63860700D9
:1005300013050000EFF05FB3130000008320C1013A
:10054000032481011301010267800000130101FEF1
:10055000232E1100232C810013040102B707008011
:100560009387474F2326F4FE8325C4FE13054000DE
:10057000EFF01FAFEFF01FF8B707008183A78725C3
:1005800013850700EFF01FB9B7070081138507092E
:10059000EFF09FB4B70700819387872583A74700B3
:1005A00013850700EFF01FB7B70700811385070910
:1005B000EFF09FB2B70700819387872583A7870055
:1005C00013850700EFF01FB5B707008113850709F2
:1005D000EFF09FB0B70700819387872583A7C700F7
:1005E00013850700EFF01FB3B707008113850709D4
:1005F000EFF09FAE130000008320C10103248101AE
:100600001301010267800000130101FF232611007E
:100610002324810013040101EFF0DFC6B707008136
:1006200013854709EFF05FAB13054000EFF0DFA340
:10063000EFF05FD313051000EFF01FA3B7070081A1
:100640001385870AEFF05FA9EFF05FF01300000059
:100650008320C10003248100130101016780000091
:10066000130101FC232E1102232C81021304010427
:100670002326A4FC8327C4FC2326F4FEEFF0DFA08E
:100680002324A4FEEFF0DFA02322A4FE8327C4FED0
:1006900083A70701032784FE637EF7008327C4FE38
:1006A00083A7C700032744FE6376F7009307100073
:1006B0006F00800093070000A301F4FE834734FE1F
:1006C00093F71700A301F4FE834734FE1385070058
:1006D000EFF09F9A834734FE638607068327C4FEA4
:1006E00003A7C700832784FEB307F702032744FE4E
:1006F000B307F700232EF4FC8327C4FE03A70700EB
:100700008327C4FD93972700B307F70083A607004C
:100710008327C4FE03A747008327C4FD93972700C0
:10072000B307F70003A707008327C4FE03A68700CB
:100730008327C4FD93972700B307F6003387E600AD
:1007400023A0E700EFF0DF93130000008320C10334
:10075000032481031301010467800000130101FCDD
:10076000232E1102232C810213040104130510000F
:10077000EFF09F8FB70700819387C71B2324F4FCFA
:10078000B70700819387C71F2326F4FCB7070081B2
:10079000938787262328F4FC93074000232AF4FC40
:1007A00093074000232CF4FC930740002322F4FE1F
:1007B000930740002320F4FE032744FE832504FE14
:1007C000930784FC93860700B707008013860766AB
:1007D00013050700EFF01FA4232604FE6F00000896
:1007E000232404FE6F004005032744FD8327C4FE35
:1007F0003307F702832784FEB307F700232EF4FCA8
:10080000B70700810327C4FD1317270093878726A6
:10081000B307F70083A7070013850700EFF09F8F4A
:10082000B70700811385470FEFF01F8B832784FEE6
:10083000938717002324F4FE032744FD832784FEB7
:10084000E3E4E7FAB70700811385870FEFF0DF884D
:100850008327C4FE938717002326F4FE032784FD15
:100860008327C4FEE3EEE7F6930700001385070035
:100870008320C10303248103130101046780000066
:02000004810079
:10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A
@ -130,32 +145,32 @@
:100060003700000038000000390000006100000087
:1000700062000000630000006400000065000000F2
:1000800066000000746573745F746D630A0000009D
:100090000A000000746573745F737061776E0A0004
:1000A000300000003100000032000000330000008A
:1000B000340000003500000036000000370000006A
:1000C00038000000390000006100000062000000FC
:1000D000630000006400000065000000660000008E
:1000E000746573745F646976657267656E63650ACB
:0100F000000F
:1000F4000000008104000081080000810C000081E0
:100104001000008114000081180000811C0000818F
:100114002000008124000081280000812C0000813F
:100124003000008134000081380000813C000081EF
:1001340044000081480000814C000081500000818F
:1001440054000081580000815C000081600000813F
:1001540064000081680000816C00008170000081EF
:1001640074000081780000817C000081800000819F
:10017400A0000081A4000081A8000081AC000081DF
:10018400B0000081B4000081B8000081BC0000818F
:10019400C0000081C4000081C8000081CC0000813F
:1001A400D0000081D4000081D8000081DC000081EF
:1001B4000100000001000000010000000100000037
:1001C4000100000001000000010000000100000027
:1001D4000100000001000000010000000100000017
:1001E4000100000001000000010000000100000007
:1001F40006000000060000000600000006000000E3
:1002040006000000060000000600000006000000D2
:1002140006000000060000000600000006000000C2
:1002240006000000060000000600000006000000B2
:100090000A000000746573745F6469766572676551
:1000A0006E63650A00000000746573745F737061AD
:1000B000776E0A00300000003100000032000000BE
:1000C000330000003400000035000000360000005E
:1000D0003700000038000000390000006100000017
:1000E0006200000063000000640000006500000082
:0A00F00066000000200000000A0076
:1000FC000000008104000081080000810C000081D8
:10010C001000008114000081180000811C00008187
:10011C002000008124000081280000812C00008137
:10012C003000008134000081380000813C000081E7
:10013C0044000081480000814C0000815000008187
:10014C0054000081580000815C0000816000008137
:10015C0064000081680000816C00008170000081E7
:10016C0074000081780000817C0000818000008197
:10017C00B4000081B8000081BC000081C000008187
:10018C00C4000081C8000081CC000081D000008137
:10019C00D4000081D8000081DC000081E0000081E7
:1001AC00E4000081E8000081EC000081F000008197
:1001BC00050000000500000005000000050000001F
:1001CC00060000000600000006000000060000000B
:1001DC0007000000070000000700000007000000F7
:1001EC0008000000080000000800000008000000E3
:1001FC0001000000010000000100000001000000EF
:10020C0001000000010000000100000001000000DE
:10021C0001000000010000000100000001000000CE
:10022C0001000000010000000100000001000000BE
:040000058000000077
:00000001FF

View file

@ -31,7 +31,7 @@ void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void
global_function_pointer = func_ptr;
global_argument_struct = args;
global_num_threads = numThreads;
vx_wspawn(numWarps, (unsigned) func_ptr);
vx_wspawn(numWarps, (unsigned) setup_call);
setup_call();
}

View file

@ -14,16 +14,16 @@ typedef struct
} mat_add_args_t;
unsigned x[] = {1, 1, 1, 1,
unsigned x[] = {5, 5, 5, 5,
6, 6, 6, 6,
7, 7, 7, 7,
8, 8, 8, 8};
unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1};
unsigned y[] = {6, 6, 6, 6,
6, 6, 6, 6,
6, 6, 6, 6,
6, 6, 6, 6};
unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
@ -51,48 +51,38 @@ int main()
{
// Main is called with all threads active of warp 0
vx_tmc(1);
///////////////////////////////////////////////////////////////////////
// mat_add_args_t arguments;
// arguments.x = x;
// arguments.y = y;
// arguments.z = z;
// arguments.numColums = 4;
// arguments.numRows = 4;
// int numWarps = 4;
// int numThreads = 4;
// vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
///////////////////////////////////////////////////////////////////////
/*
NOTE: * when test_wspawn is called from instrinsic_tests, RA 80000458 is stored at address 6fffefbc,
but when read back again it reads zeros even though no other write request is made to that
address (when only test_wsapwn is called by itself).
mat_add_args_t arguments;
arguments.x = x;
arguments.y = y;
arguments.z = z;
arguments.numColums = 4;
arguments.numRows = 4;
* When test_wsapwn is called by itself from main new lines are not printed....
* when test_wspawn is called with other tests from main it works fine...
*/
int numWarps = 4;
int numThreads = 4;
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
for (int i = 0; i < arguments.numRows; i++)
{
for (int j = 0; j < arguments.numColums; j++)
{
unsigned index = (i * arguments.numColums) + j;
vx_print_hex(z[index]);
vx_print_str(" ");
}
vx_print_str("\n");
}
///////////////////////////////////////////////////////////////////////
// intrinsics_tests();
///////////////////////////////////////////////////////////////////////
test_tmc();
// Control Divergence Test
vx_print_str("test_divergence\n");
vx_tmc(4);
test_divergence();
vx_tmc(1);
// // Test wspawn
// vx_print_str("test_wspawn\n");
// test_wsapwn();
return 0;
}