Warp Scheduling + Control Divergence working and stable

This commit is contained in:
felsabbagh3 2019-02-22 07:00:35 -05:00
parent 087a39ccf4
commit af4303a4ca
13 changed files with 298246 additions and 496 deletions

View file

@ -203,20 +203,22 @@ void Warp::step() {
D_RAW(' ' << hex << reg[j][i] << ' ');
D_RAW('(' << shadowReg[i] << ')' << endl);
}
D(3, "Predicate state:");
D_RAW(" ");
for (unsigned j = 0; j < pred.size(); ++j) {
for (unsigned i = 0; i < pred[j].size(); ++i) D_RAW(pred[j][i]);
D_RAW(endl);
}
D_RAW(" (");
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
D_RAW(')' << endl);
// D(3, "Predicate state:");
// D_RAW(" ");
// for (unsigned j = 0; j < pred.size(); ++j) {
// for (unsigned i = 0; i < pred[j].size(); ++i) D_RAW(pred[j][i]);
// D_RAW(endl);
// }
// D_RAW(" (");
// for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
// D_RAW(')' << endl);
D(3, "Thread mask:");
D_RAW(" ");
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
D_RAW(endl);
D_RAW(endl);
D_RAW(endl);
}
#endif

View file

@ -57,8 +57,13 @@ namespace Harp {
std::vector<bool> &tm, Word pc
): pc(pc), fallThrough(false), uni(false)
{
std::cout << "DomStackEntry TMASK: ";
for (unsigned i = 0; i < m.size(); ++i)
{
std::cout << " " << (!bool(m[i][p]) && tm[i]);
tmask.push_back(!bool(m[i][p]) && tm[i]);
}
std::cout << "\n";
}
DomStackEntry(const std::vector<bool> &tmask):

View file

@ -4,7 +4,7 @@
#ifndef __DEBUG_H
#define __DEBUG_H
// #define USE_DEBUG 9
#define USE_DEBUG 9
#ifdef USE_DEBUG
#include <iostream>

View file

@ -28,6 +28,7 @@ namespace Harp {
SYS_INST = 115,
TRAP = 0x7f,
FENCE = 0x0f,
PJ_INST = 0x7b,
GPGPU = 0x6b
};
@ -56,6 +57,7 @@ namespace Harp {
{Opcode::SYS_INST, {"SYS" , true , false, false, false, InstType::I_TYPE }},
{Opcode::TRAP, {"TRAP" , true , false, false, false, InstType::I_TYPE }},
{Opcode::FENCE, {"fence" , true , false, false, false, InstType::I_TYPE }},
{Opcode::PJ_INST, {"pred j", true , false, false, false, InstType::R_TYPE }},
{Opcode::GPGPU, {"gpgpu" , false, false, false, false, InstType::R_TYPE }}
};

View file

@ -47,7 +47,7 @@ ostream &Harp::operator<<(ostream& os, Instruction &inst) {
return os;
}
bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<bool> > >& m,
bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<Word> > >& m,
const std::vector<bool> &tm) {
bool same;
unsigned i;
@ -57,15 +57,19 @@ bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<bool> > >& m,
break;
}
}
if (i == m.size())
throw DivergentBranchException();
if (i == m.size()) throw DivergentBranchException();
std::cout << "same: " << same << " with -> ";
for (; i < m.size(); ++i) {
if (tm[i]) {
std::cout << " " << (bool(m[i][p]));
if (same != (bool(m[i][p]))) {
std::cout << " FALSE\n";
return false;
}
}
}
std::cout << " TRUE\n";
return true;
}
@ -122,11 +126,20 @@ void Instruction::executeOn(Warp &c) {
vector<Reg<bool> > &pReg(c.pred[t]);
stack<DomStackEntry> &domStack(c.domStack);
std::cout << std::hex << "opcode: " << op << " func3: " << func3 << "\n";
if (op == GPGPU) std::cout << "OPCODE MATCHED GPGPU\n";
// If this thread is masked out, don't execute the instruction, unless it's
// a split or join.
// if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
// op != SPLIT && op != JOIN) continue;
predicated = (op == GPGPU) && ((func3 == 7) || (func3 == 2));
bool split = (op == GPGPU) && (func3 == 2);
bool join = (op == GPGPU) && (func3 == 3);
if (((predicated && !reg[pred]) || !c.tmask[t]) && !split && !join) continue;
++c.insts;
Word memAddr;
@ -134,13 +147,14 @@ void Instruction::executeOn(Warp &c) {
Word shamt;
Word temp;
Word data_read;
// Word pred;
DomStackEntry e(pred, c.reg, c.tmask, c.pc);
int op1, op2;
switch (op) {
case NOP: break;
case NOP:
std::cout << "NOP_INST\n";
break;
case R_INST:
std::cout << "R_INST\n";
switch (func3)
{
case 0:
@ -207,7 +221,7 @@ void Instruction::executeOn(Warp &c) {
break;
case L_INST:
std::cout << "L_INST\n";
memAddr = ((reg[rsrc[0]] + immsrc) & 0xFFFFFFFC);
shift_by = ((reg[rsrc[0]] + immsrc) & 0x00000003) * 8;
data_read = c.core->mem.read(memAddr, c.supervisorMode);
@ -245,6 +259,7 @@ void Instruction::executeOn(Warp &c) {
}
break;
case I_INST:
std::cout << "I_INST\n";
switch (func3)
{
@ -330,6 +345,7 @@ void Instruction::executeOn(Warp &c) {
}
break;
case S_INST:
std::cout << "S_INST\n";
++c.stores;
memAddr = reg[rsrc[0]] + immsrc;
// std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
@ -359,6 +375,7 @@ void Instruction::executeOn(Warp &c) {
#endif
break;
case B_INST:
std::cout << "B_INST\n";
switch (func3)
{
case 0:
@ -412,13 +429,17 @@ void Instruction::executeOn(Warp &c) {
}
break;
case LUI_INST:
std::cout << "LUI_INST\n";
reg[rdest] = (immsrc << 12) & 0xfffff000;
break;
case AUIPC_INST:
std::cout << "AUIPC_INST\n";
reg[rdest] = ((immsrc << 12) & 0xfffff000) + (c.pc - 4);
break;
case JAL_INST:
std::cout << "JAL_INST\n";
if (!pcSet) nextPc = (c.pc - 4) + immsrc;
if (!pcSet) std::cout << "JAL... SETTING PC: " << nextPc << "\n";
if (rdest != 0)
{
reg[rdest] = c.pc;
@ -426,8 +447,9 @@ void Instruction::executeOn(Warp &c) {
pcSet = true;
break;
case JALR_INST:
std::cout << "JALR_INST\n";
if (!pcSet) nextPc = reg[rsrc[0]] + immsrc;
if (!pcSet) std::cout << "JALR... SETTING PC: " << nextPc << "\n";
if (rdest != 0)
{
reg[rdest] = c.pc;
@ -435,6 +457,7 @@ void Instruction::executeOn(Warp &c) {
pcSet = true;
break;
case SYS_INST:
std::cout << "SYS_INST\n";
temp = reg[rsrc[0]];
switch (func3)
{
@ -505,65 +528,107 @@ void Instruction::executeOn(Warp &c) {
c.interrupt(0);
break;
case FENCE:
std::cout << "FENCE_INST\n";
break;
case PJ_INST:
// pred jump reg
std::cout << "pred jump... src: " << rsrc[0] << std::hex << " val: " << reg[rsrc[0]] << " dest: " << reg[rsrc[1]] << "\n";
if (reg[rsrc[0]])
{
if (!pcSet) nextPc = reg[rsrc[1]];
pcSet = true;
}
break;
case GPGPU:
std::cout << "GPGPU\n";
switch(func3)
{
case 0:
// WSPAWN
D(0, "Spawning a new warp.");
// std::cout << "SIZE: " << c.core->w.size() << "\n";
for (unsigned i = 0; i < c.core->w.size(); ++i)
std::cout << "WSPAWN\n";
if (sjOnce)
{
// std::cout << "WHATTT\n";
Warp &newWarp(c.core->w[i]);
// std::cout << "STARTING\n";
if (newWarp.spawned == false) {
// std::cout << "ABOUT TO START\n";
newWarp.pc = reg[rsrc[0]];
newWarp.reg[0] = reg;
newWarp.csr = c.csr;
newWarp.activeThreads = 1;
newWarp.supervisorMode = false;
newWarp.spawned = true;
break;
sjOnce = false;
D(0, "Spawning a new warp.");
// std::cout << "SIZE: " << c.core->w.size() << "\n";
for (unsigned i = 0; i < c.core->w.size(); ++i)
{
// std::cout << "WHATTT\n";
Warp &newWarp(c.core->w[i]);
// std::cout << "STARTING\n";
if (newWarp.spawned == false) {
// std::cout << "ABOUT TO START\n";
newWarp.pc = reg[rsrc[0]];
newWarp.reg[0] = reg;
newWarp.csr = c.csr;
newWarp.activeThreads = 1;
newWarp.supervisorMode = false;
newWarp.spawned = true;
break;
}
}
}
break;
case 2:
{
// SPLIT
c.domStack.push(c.tmask);
c.domStack.push(e);
for (unsigned i = 0; i < e.tmask.size(); ++i)
std::cout << "SPLIT\n";
if (sjOnce)
{
c.tmask[i] = !e.tmask[i] && c.tmask[i];
sjOnce = false;
if (checkUnanimous(pred, c.reg, c.tmask)) {
std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
DomStackEntry e(c.tmask);
e.uni = true;
c.domStack.push(e);
break;
}
DomStackEntry e(pred, c.reg, c.tmask, c.pc);
c.domStack.push(c.tmask);
c.domStack.push(e);
for (unsigned i = 0; i < e.tmask.size(); ++i)
{
c.tmask[i] = !e.tmask[i] && c.tmask[i];
}
}
}
break;
case 3:
// JOIN
if (!c.domStack.top().fallThrough) {
if (!pcSet) nextPc = c.domStack.top().pc;
pcSet = true;
std::cout << "JOIN\n";
if (sjOnce)
{
sjOnce = false;
if (!c.domStack.empty() && c.domStack.top().uni) {
D(2, "Uni branch at join");
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
break;
}
if (!c.domStack.top().fallThrough) {
if (!pcSet) nextPc = c.domStack.top().pc;
pcSet = true;
}
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
}
c.tmask = c.domStack.top().tmask;
c.domStack.pop();
break;
case 4:
// JMPRT
std::cout << "JMPRT\n";
nextActiveThreads = 1;
if (!pcSet) nextPc = reg[rsrc[0]];
pcSet = true;
break;
case 5:
// CLONE
// std::cout << "CLONE\n";
std::cout << "CLONE\n";
// std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
c.reg[reg[rsrc[0]]] = reg;
break;
case 6:
// JALRS
std::cout << "JALRS\n";
nextActiveThreads = reg[rsrc[1]];
reg[rdest] = c.pc;
if (!pcSet) nextPc = reg[rsrc[0]];
@ -571,14 +636,6 @@ void Instruction::executeOn(Warp &c) {
// std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
// std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
break;
case 7:
// pred jump reg
if (reg[rsrc[0]])
{
nextPc = reg[rsrc[1]];
pcSet = true;
}
break;
default:
cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n";
}
@ -593,6 +650,20 @@ void Instruction::executeOn(Warp &c) {
c.activeThreads = nextActiveThreads;
// if (nextActiveThreads != 0)
// {
// for (int i = 7; i >= c.activeThreads; i--)
// {
// c.tmask[i] = c.tmask[i] && false;
// }
// }
// std::cout << "new thread mask: ";
// for (int i = 0; i < c.tmask.size(); ++i) std::cout << " " << c.tmask[i];
// std::cout << "\n";
// This way, if pc was set by a side effect (such as interrupt), it will
// retain its new value.
if (pcSet) c.pc = nextPc;

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
// #include <stdint.h>
// #include <stdbool.h>
#include <stdbool.h>
// #include <cstdint>
@ -21,8 +21,8 @@ unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
#define NUM_WARPS 16
#define NUM_THREADS 1
#define NUM_WARPS 3
#define NUM_THREADS 7
int main()
{
@ -44,20 +44,13 @@ void matAddition(unsigned tid, unsigned wid)
unsigned i = (wid * NUM_THREADS) + tid;
// int cond = i < 16;
// __if(cond)
__if((i < 10))
z_ptr[i] = x_ptr[i] + y_ptr[i];
__else
__end_if
// // DO SOMETHING
// __else
// // DO SOMETHING ELSE
// __end_if
z_ptr[i] = x_ptr[i] + y_ptr[i];
sleep((100 * wid)+100);
sleep((50 * (wid + wid))+100);
return;

View file

@ -6,7 +6,7 @@ Disassembly of section .text:
80000000 <_start>:
80000000: 7ffff137 lui sp,0x7ffff
80000004: 18c000ef jal ra,80000190 <queue_initialize>
80000004: 1d4000ef jal ra,800001d8 <queue_initialize>
80000008: 068000ef jal ra,80000070 <main>
8000000c: 00000073 ecall
@ -39,7 +39,7 @@ Disassembly of section .text:
8000004c: 00038d93 mv s11,t2
80000050: 01bfe0eb 0x1bfe0eb
80000054: 00000517 auipc a0,0x0
80000058: 2a850513 addi a0,a0,680 # 800002fc <reschedule_warps>
80000058: 2f050513 addi a0,a0,752 # 80000344 <reschedule_warps>
8000005c: 0005406b 0x5406b
80000060 <wspawn>:
@ -61,11 +61,11 @@ Disassembly of section .text:
80000094: 5cc68693 addi a3,a3,1484 # 810005cc <y+0xffffffc0>
80000098: 80000637 lui a2,0x80000
8000009c: 0d460613 addi a2,a2,212 # 800000d4 <y+0xfefffac8>
800000a0: 00100593 li a1,1
800000a4: 01000513 li a0,16
800000a8: 390000ef jal ra,80000438 <createWarps>
800000a0: 00700593 li a1,7
800000a4: 00300513 li a0,3
800000a8: 3d8000ef jal ra,80000480 <createWarps>
800000ac: 00000013 nop
800000b0: 214000ef jal ra,800002c4 <queue_isEmpty>
800000b0: 25c000ef jal ra,8000030c <queue_isEmpty>
800000b4: 00050793 mv a5,a0
800000b8: fe078ce3 beqz a5,800000b0 <main+0x40>
800000bc: 00000793 li a5,0
@ -76,334 +76,352 @@ Disassembly of section .text:
800000d0: 00008067 ret
800000d4 <matAddition>:
800000d4: fd010113 addi sp,sp,-48
800000d8: 02112623 sw ra,44(sp)
800000dc: 02812423 sw s0,40(sp)
800000e0: 03010413 addi s0,sp,48
800000e4: fca42e23 sw a0,-36(s0)
800000e8: fcb42c23 sw a1,-40(s0)
800000ec: 408000ef jal ra,800004f4 <get_1st_arg>
800000d4: fc010113 addi sp,sp,-64
800000d8: 02112e23 sw ra,60(sp)
800000dc: 02812c23 sw s0,56(sp)
800000e0: 04010413 addi s0,sp,64
800000e4: fca42623 sw a0,-52(s0)
800000e8: fcb42423 sw a1,-56(s0)
800000ec: 450000ef jal ra,8000053c <get_1st_arg>
800000f0: fea42623 sw a0,-20(s0)
800000f4: 428000ef jal ra,8000051c <get_2nd_arg>
800000f4: 470000ef jal ra,80000564 <get_2nd_arg>
800000f8: fea42423 sw a0,-24(s0)
800000fc: 448000ef jal ra,80000544 <get_3rd_arg>
800000fc: 490000ef jal ra,8000058c <get_3rd_arg>
80000100: fea42223 sw a0,-28(s0)
80000104: fd842703 lw a4,-40(s0)
80000108: fdc42783 lw a5,-36(s0)
8000010c: 00f707b3 add a5,a4,a5
80000110: fef42023 sw a5,-32(s0)
80000114: fe042783 lw a5,-32(s0)
80000118: 00279793 slli a5,a5,0x2
8000011c: fec42703 lw a4,-20(s0)
80000120: 00f707b3 add a5,a4,a5
80000124: 0007a683 lw a3,0(a5) # 81000000 <y+0xfffff9f4>
80000128: fe042783 lw a5,-32(s0)
8000012c: 00279793 slli a5,a5,0x2
80000130: fe842703 lw a4,-24(s0)
80000134: 00f707b3 add a5,a4,a5
80000138: 0007a703 lw a4,0(a5)
8000013c: fe042783 lw a5,-32(s0)
80000140: 00279793 slli a5,a5,0x2
80000144: fe442603 lw a2,-28(s0)
80000148: 00f607b3 add a5,a2,a5
8000014c: 00e68733 add a4,a3,a4
80000150: 00e7a023 sw a4,0(a5)
80000154: fd842783 lw a5,-40(s0)
80000158: 00178713 addi a4,a5,1
8000015c: 00070793 mv a5,a4
80000160: 00179793 slli a5,a5,0x1
80000164: 00e787b3 add a5,a5,a4
80000168: 00379793 slli a5,a5,0x3
8000016c: 00e787b3 add a5,a5,a4
80000170: 00279793 slli a5,a5,0x2
80000174: 00078513 mv a0,a5
80000178: 280000ef jal ra,800003f8 <sleep>
8000017c: 00000013 nop
80000180: 02c12083 lw ra,44(sp)
80000184: 02812403 lw s0,40(sp)
80000188: 03010113 addi sp,sp,48
8000018c: 00008067 ret
80000104: fc842703 lw a4,-56(s0)
80000108: 00070793 mv a5,a4
8000010c: 00379793 slli a5,a5,0x3
80000110: 40e787b3 sub a5,a5,a4
80000114: fcc42703 lw a4,-52(s0)
80000118: 00f707b3 add a5,a4,a5
8000011c: fef42023 sw a5,-32(s0)
80000120: fe042783 lw a5,-32(s0)
80000124: 00a7b793 sltiu a5,a5,10
80000128: 0017c793 xori a5,a5,1
8000012c: fcf40fa3 sb a5,-33(s0)
80000130: fdf44783 lbu a5,-33(s0)
80000134: 00078f13 mv t5,a5
80000138: 800007b7 lui a5,0x80000
8000013c: 19478f93 addi t6,a5,404 # 80000194 <y+0xfefffb88>
80000140: 000f206b 0xf206b
80000144: 01ff707b 0x1ff707b
80000148: fe042783 lw a5,-32(s0)
8000014c: 00279793 slli a5,a5,0x2
80000150: fec42703 lw a4,-20(s0)
80000154: 00f707b3 add a5,a4,a5
80000158: 0007a683 lw a3,0(a5)
8000015c: fe042783 lw a5,-32(s0)
80000160: 00279793 slli a5,a5,0x2
80000164: fe842703 lw a4,-24(s0)
80000168: 00f707b3 add a5,a4,a5
8000016c: 0007a703 lw a4,0(a5)
80000170: fe042783 lw a5,-32(s0)
80000174: 00279793 slli a5,a5,0x2
80000178: fe442603 lw a2,-28(s0)
8000017c: 00f607b3 add a5,a2,a5
80000180: 00e68733 add a4,a3,a4
80000184: 00e7a023 sw a4,0(a5)
80000188: 800007b7 lui a5,0x80000
8000018c: 19878e13 addi t3,a5,408 # 80000198 <y+0xfefffb8c>
80000190: 000e0067 jr t3
80000194: 00000013 nop
80000198: 0000306b 0x306b
8000019c: fc842783 lw a5,-56(s0)
800001a0: 00178713 addi a4,a5,1
800001a4: 00070793 mv a5,a4
800001a8: 00179793 slli a5,a5,0x1
800001ac: 00e787b3 add a5,a5,a4
800001b0: 00379793 slli a5,a5,0x3
800001b4: 00e787b3 add a5,a5,a4
800001b8: 00279793 slli a5,a5,0x2
800001bc: 00078513 mv a0,a5
800001c0: 280000ef jal ra,80000440 <sleep>
800001c4: 00000013 nop
800001c8: 03c12083 lw ra,60(sp)
800001cc: 03812403 lw s0,56(sp)
800001d0: 04010113 addi sp,sp,64
800001d4: 00008067 ret
80000190 <queue_initialize>:
80000190: 01000297 auipc t0,0x1000
80000194: eb028293 addi t0,t0,-336 # 81000040 <q>
80000198: 00000313 li t1,0
8000019c: 00700393 li t2,7
800001a0: 0062a023 sw t1,0(t0)
800001a4: 0062a223 sw t1,4(t0)
800001a8: 0062a423 sw t1,8(t0)
800001ac: 0072a623 sw t2,12(t0)
800001b0: 0062a823 sw t1,16(t0)
800001b4: 00008067 ret
800001d8 <queue_initialize>:
800001d8: 01000297 auipc t0,0x1000
800001dc: e6828293 addi t0,t0,-408 # 81000040 <q>
800001e0: 00000313 li t1,0
800001e4: 00700393 li t2,7
800001e8: 0062a023 sw t1,0(t0)
800001ec: 0062a223 sw t1,4(t0)
800001f0: 0062a423 sw t1,8(t0)
800001f4: 0072a623 sw t2,12(t0)
800001f8: 0062a823 sw t1,16(t0)
800001fc: 00008067 ret
800001b8 <queue_enqueue>:
800001b8: 01000297 auipc t0,0x1000
800001bc: e8828293 addi t0,t0,-376 # 81000040 <q>
800001c0: 0082a303 lw t1,8(t0)
800001c4: 00130313 addi t1,t1,1
800001c8: 0062a423 sw t1,8(t0)
800001cc: 01428313 addi t1,t0,20
800001d0: 0042ae83 lw t4,4(t0)
800001d4: 005e9393 slli t2,t4,0x5
800001d8: 00730333 add t1,t1,t2
800001dc: 00052e03 lw t3,0(a0)
800001e0: 01c32023 sw t3,0(t1)
800001e4: 00452e03 lw t3,4(a0)
800001e8: 01c32223 sw t3,4(t1)
800001ec: 00852e03 lw t3,8(a0)
800001f0: 01c32423 sw t3,8(t1)
800001f4: 00c52e03 lw t3,12(a0)
800001f8: 01c32623 sw t3,12(t1)
800001fc: 01052e03 lw t3,16(a0)
80000200: 01c32823 sw t3,16(t1)
80000204: 01452e03 lw t3,20(a0)
80000208: 01c32a23 sw t3,20(t1)
8000020c: 01852e03 lw t3,24(a0)
80000210: 01c32c23 sw t3,24(t1)
80000214: 001e8e93 addi t4,t4,1
80000218: 03200f13 li t5,50
8000021c: 01ee9463 bne t4,t5,80000224 <ec>
80000220: 00000e93 li t4,0
80000200 <queue_enqueue>:
80000200: 01000297 auipc t0,0x1000
80000204: e4028293 addi t0,t0,-448 # 81000040 <q>
80000208: 0082a303 lw t1,8(t0)
8000020c: 00130313 addi t1,t1,1
80000210: 0062a423 sw t1,8(t0)
80000214: 01428313 addi t1,t0,20
80000218: 0042ae83 lw t4,4(t0)
8000021c: 005e9393 slli t2,t4,0x5
80000220: 00730333 add t1,t1,t2
80000224: 00052e03 lw t3,0(a0)
80000228: 01c32023 sw t3,0(t1)
8000022c: 00452e03 lw t3,4(a0)
80000230: 01c32223 sw t3,4(t1)
80000234: 00852e03 lw t3,8(a0)
80000238: 01c32423 sw t3,8(t1)
8000023c: 00c52e03 lw t3,12(a0)
80000240: 01c32623 sw t3,12(t1)
80000244: 01052e03 lw t3,16(a0)
80000248: 01c32823 sw t3,16(t1)
8000024c: 01452e03 lw t3,20(a0)
80000250: 01c32a23 sw t3,20(t1)
80000254: 01852e03 lw t3,24(a0)
80000258: 01c32c23 sw t3,24(t1)
8000025c: 001e8e93 addi t4,t4,1
80000260: 03200f13 li t5,50
80000264: 01ee9463 bne t4,t5,8000026c <ec>
80000268: 00000e93 li t4,0
80000224 <ec>:
80000224: 01d2a223 sw t4,4(t0)
80000228: 00008067 ret
8000026c <ec>:
8000026c: 01d2a223 sw t4,4(t0)
80000270: 00008067 ret
8000022c <queue_dequeue>:
8000022c: 01000297 auipc t0,0x1000
80000230: e1428293 addi t0,t0,-492 # 81000040 <q>
80000234: 0082a303 lw t1,8(t0)
80000238: fff30313 addi t1,t1,-1
8000023c: 0062a423 sw t1,8(t0)
80000240: 01428313 addi t1,t0,20
80000244: 0002ae83 lw t4,0(t0)
80000248: 03200f93 li t6,50
8000024c: 000e8f13 mv t5,t4
80000250: 001f0f13 addi t5,t5,1
80000254: 01ff1463 bne t5,t6,8000025c <dc>
80000258: 00000f13 li t5,0
80000274 <queue_dequeue>:
80000274: 01000297 auipc t0,0x1000
80000278: dcc28293 addi t0,t0,-564 # 81000040 <q>
8000027c: 0082a303 lw t1,8(t0)
80000280: fff30313 addi t1,t1,-1
80000284: 0062a423 sw t1,8(t0)
80000288: 01428313 addi t1,t0,20
8000028c: 0002ae83 lw t4,0(t0)
80000290: 03200f93 li t6,50
80000294: 000e8f13 mv t5,t4
80000298: 001f0f13 addi t5,t5,1
8000029c: 01ff1463 bne t5,t6,800002a4 <dc>
800002a0: 00000f13 li t5,0
8000025c <dc>:
8000025c: 01e2a023 sw t5,0(t0)
80000260: 005e9393 slli t2,t4,0x5
80000264: 00730333 add t1,t1,t2
80000268: 00032e03 lw t3,0(t1)
8000026c: 01c52023 sw t3,0(a0)
80000270: 00432e03 lw t3,4(t1)
80000274: 01c52223 sw t3,4(a0)
80000278: 00832e03 lw t3,8(t1)
8000027c: 01c52423 sw t3,8(a0)
80000280: 00c32e03 lw t3,12(t1)
80000284: 01c52623 sw t3,12(a0)
80000288: 01032e03 lw t3,16(t1)
8000028c: 01c52823 sw t3,16(a0)
80000290: 01432e03 lw t3,20(t1)
80000294: 01c52a23 sw t3,20(a0)
80000298: 01832e03 lw t3,24(t1)
8000029c: 01c52c23 sw t3,24(a0)
800002a0: 00008067 ret
800002a4 <dc>:
800002a4: 01e2a023 sw t5,0(t0)
800002a8: 005e9393 slli t2,t4,0x5
800002ac: 00730333 add t1,t1,t2
800002b0: 00032e03 lw t3,0(t1)
800002b4: 01c52023 sw t3,0(a0)
800002b8: 00432e03 lw t3,4(t1)
800002bc: 01c52223 sw t3,4(a0)
800002c0: 00832e03 lw t3,8(t1)
800002c4: 01c52423 sw t3,8(a0)
800002c8: 00c32e03 lw t3,12(t1)
800002cc: 01c52623 sw t3,12(a0)
800002d0: 01032e03 lw t3,16(t1)
800002d4: 01c52823 sw t3,16(a0)
800002d8: 01432e03 lw t3,20(t1)
800002dc: 01c52a23 sw t3,20(a0)
800002e0: 01832e03 lw t3,24(t1)
800002e4: 01c52c23 sw t3,24(a0)
800002e8: 00008067 ret
800002a4 <queue_isFull>:
800002a4: 01000297 auipc t0,0x1000
800002a8: d9c28293 addi t0,t0,-612 # 81000040 <q>
800002ac: 0082a303 lw t1,8(t0)
800002b0: 00000513 li a0,0
800002b4: 03200e13 li t3,50
800002b8: 006e1463 bne t3,t1,800002c0 <qf>
800002bc: 00150513 addi a0,a0,1
800002ec <queue_isFull>:
800002ec: 01000297 auipc t0,0x1000
800002f0: d5428293 addi t0,t0,-684 # 81000040 <q>
800002f4: 0082a303 lw t1,8(t0)
800002f8: 00000513 li a0,0
800002fc: 03200e13 li t3,50
80000300: 006e1463 bne t3,t1,80000308 <qf>
80000304: 00150513 addi a0,a0,1
800002c0 <qf>:
800002c0: 00008067 ret
80000308 <qf>:
80000308: 00008067 ret
800002c4 <queue_isEmpty>:
800002c4: 01000297 auipc t0,0x1000
800002c8: d7c28293 addi t0,t0,-644 # 81000040 <q>
800002cc: 0082a303 lw t1,8(t0)
800002d0: 00000513 li a0,0
800002d4: 00000e13 li t3,0
800002d8: 006e1463 bne t3,t1,800002e0 <qe>
800002dc: 00150513 addi a0,a0,1
8000030c <queue_isEmpty>:
8000030c: 01000297 auipc t0,0x1000
80000310: d3428293 addi t0,t0,-716 # 81000040 <q>
80000314: 0082a303 lw t1,8(t0)
80000318: 00000513 li a0,0
8000031c: 00000e13 li t3,0
80000320: 006e1463 bne t3,t1,80000328 <qe>
80000324: 00150513 addi a0,a0,1
800002e0 <qe>:
800002e0: 00008067 ret
80000328 <qe>:
80000328: 00008067 ret
800002e4 <queue_availableWarps>:
800002e4: 01000297 auipc t0,0x1000
800002e8: d5c28293 addi t0,t0,-676 # 81000040 <q>
800002ec: 00c2a303 lw t1,12(t0)
800002f0: 0102a383 lw t2,16(t0)
800002f4: 0063b533 sltu a0,t2,t1
800002f8: 00008067 ret
8000032c <queue_availableWarps>:
8000032c: 01000297 auipc t0,0x1000
80000330: d1428293 addi t0,t0,-748 # 81000040 <q>
80000334: 00c2a303 lw t1,12(t0)
80000338: 0102a383 lw t2,16(t0)
8000033c: 0063b533 sltu a0,t2,t1
80000340: 00008067 ret
800002fc <reschedule_warps>:
800002fc: fd010113 addi sp,sp,-48
80000300: 02112623 sw ra,44(sp)
80000304: 02812423 sw s0,40(sp)
80000308: 03010413 addi s0,sp,48
8000030c: fb9ff0ef jal ra,800002c4 <queue_isEmpty>
80000310: 00050793 mv a5,a0
80000314: 00078463 beqz a5,8000031c <reschedule_warps+0x20>
80000318: 00000073 ecall
8000031c: fd440793 addi a5,s0,-44
80000320: 00078513 mv a0,a5
80000324: f09ff0ef jal ra,8000022c <queue_dequeue>
80000328: fdc42783 lw a5,-36(s0)
8000032c: 00078113 mv sp,a5
80000330: fd842503 lw a0,-40(s0)
80000334: fd442583 lw a1,-44(s0)
80000338: fe042603 lw a2,-32(s0)
8000033c: fe442683 lw a3,-28(s0)
80000340: fe842703 lw a4,-24(s0)
80000344: fec42783 lw a5,-20(s0)
80000348: cc9ff0ef jal ra,80000010 <createThreads>
8000034c: 00000073 ecall
80000350: 00000013 nop
80000354: 02c12083 lw ra,44(sp)
80000358: 02812403 lw s0,40(sp)
8000035c: 03010113 addi sp,sp,48
80000360: 00008067 ret
80000344 <reschedule_warps>:
80000344: fd010113 addi sp,sp,-48
80000348: 02112623 sw ra,44(sp)
8000034c: 02812423 sw s0,40(sp)
80000350: 03010413 addi s0,sp,48
80000354: fb9ff0ef jal ra,8000030c <queue_isEmpty>
80000358: 00050793 mv a5,a0
8000035c: 00078463 beqz a5,80000364 <reschedule_warps+0x20>
80000360: 00000073 ecall
80000364: fd440793 addi a5,s0,-44
80000368: 00078513 mv a0,a5
8000036c: f09ff0ef jal ra,80000274 <queue_dequeue>
80000370: fdc42783 lw a5,-36(s0)
80000374: 00078113 mv sp,a5
80000378: fd842503 lw a0,-40(s0)
8000037c: fd442583 lw a1,-44(s0)
80000380: fe042603 lw a2,-32(s0)
80000384: fe442683 lw a3,-28(s0)
80000388: fe842703 lw a4,-24(s0)
8000038c: fec42783 lw a5,-20(s0)
80000390: c81ff0ef jal ra,80000010 <createThreads>
80000394: 00000073 ecall
80000398: 00000013 nop
8000039c: 02c12083 lw ra,44(sp)
800003a0: 02812403 lw s0,40(sp)
800003a4: 03010113 addi sp,sp,48
800003a8: 00008067 ret
80000364 <schedule_warps>:
80000364: fd010113 addi sp,sp,-48
80000368: 02112623 sw ra,44(sp)
8000036c: 02812423 sw s0,40(sp)
80000370: 03010413 addi s0,sp,48
80000374: 00010993 mv s3,sp
80000378: 0500006f j 800003c8 <schedule_warps+0x64>
8000037c: 810007b7 lui a5,0x81000
80000380: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
80000384: 0107a783 lw a5,16(a5)
80000388: 00178713 addi a4,a5,1
8000038c: 810007b7 lui a5,0x81000
80000390: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
80000394: 00e7a823 sw a4,16(a5)
80000398: fd440793 addi a5,s0,-44
8000039c: 00078513 mv a0,a5
800003a0: e8dff0ef jal ra,8000022c <queue_dequeue>
800003a4: fdc42783 lw a5,-36(s0)
800003a8: 00078113 mv sp,a5
800003ac: fd842503 lw a0,-40(s0)
800003b0: fd442583 lw a1,-44(s0)
800003b4: fe042603 lw a2,-32(s0)
800003b8: fe442683 lw a3,-28(s0)
800003bc: fe842703 lw a4,-24(s0)
800003c0: fec42783 lw a5,-20(s0)
800003c4: c9dff0ef jal ra,80000060 <wspawn>
800003c8: efdff0ef jal ra,800002c4 <queue_isEmpty>
800003cc: 00050793 mv a5,a0
800003d0: 00079863 bnez a5,800003e0 <schedule_warps+0x7c>
800003d4: f11ff0ef jal ra,800002e4 <queue_availableWarps>
800003d8: 00050793 mv a5,a0
800003dc: fa0790e3 bnez a5,8000037c <schedule_warps+0x18>
800003e0: 00098113 mv sp,s3
800003e4: 00000013 nop
800003e8: 02c12083 lw ra,44(sp)
800003ec: 02812403 lw s0,40(sp)
800003f0: 03010113 addi sp,sp,48
800003f4: 00008067 ret
800003ac <schedule_warps>:
800003ac: fd010113 addi sp,sp,-48
800003b0: 02112623 sw ra,44(sp)
800003b4: 02812423 sw s0,40(sp)
800003b8: 03010413 addi s0,sp,48
800003bc: 00010993 mv s3,sp
800003c0: 0500006f j 80000410 <schedule_warps+0x64>
800003c4: 810007b7 lui a5,0x81000
800003c8: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
800003cc: 0107a783 lw a5,16(a5)
800003d0: 00178713 addi a4,a5,1
800003d4: 810007b7 lui a5,0x81000
800003d8: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
800003dc: 00e7a823 sw a4,16(a5)
800003e0: fd440793 addi a5,s0,-44
800003e4: 00078513 mv a0,a5
800003e8: e8dff0ef jal ra,80000274 <queue_dequeue>
800003ec: fdc42783 lw a5,-36(s0)
800003f0: 00078113 mv sp,a5
800003f4: fd842503 lw a0,-40(s0)
800003f8: fd442583 lw a1,-44(s0)
800003fc: fe042603 lw a2,-32(s0)
80000400: fe442683 lw a3,-28(s0)
80000404: fe842703 lw a4,-24(s0)
80000408: fec42783 lw a5,-20(s0)
8000040c: c55ff0ef jal ra,80000060 <wspawn>
80000410: efdff0ef jal ra,8000030c <queue_isEmpty>
80000414: 00050793 mv a5,a0
80000418: 00079863 bnez a5,80000428 <schedule_warps+0x7c>
8000041c: f11ff0ef jal ra,8000032c <queue_availableWarps>
80000420: 00050793 mv a5,a0
80000424: fa0790e3 bnez a5,800003c4 <schedule_warps+0x18>
80000428: 00098113 mv sp,s3
8000042c: 00000013 nop
80000430: 02c12083 lw ra,44(sp)
80000434: 02812403 lw s0,40(sp)
80000438: 03010113 addi sp,sp,48
8000043c: 00008067 ret
800003f8 <sleep>:
800003f8: fd010113 addi sp,sp,-48
800003fc: 02812623 sw s0,44(sp)
80000400: 03010413 addi s0,sp,48
80000404: fca42e23 sw a0,-36(s0)
80000408: fe042623 sw zero,-20(s0)
8000040c: 0100006f j 8000041c <sleep+0x24>
80000410: fec42783 lw a5,-20(s0)
80000414: 00178793 addi a5,a5,1
80000418: fef42623 sw a5,-20(s0)
8000041c: fec42703 lw a4,-20(s0)
80000420: fdc42783 lw a5,-36(s0)
80000424: fef746e3 blt a4,a5,80000410 <sleep+0x18>
80000428: 00000013 nop
8000042c: 02c12403 lw s0,44(sp)
80000430: 03010113 addi sp,sp,48
80000434: 00008067 ret
80000440 <sleep>:
80000440: fd010113 addi sp,sp,-48
80000444: 02812623 sw s0,44(sp)
80000448: 03010413 addi s0,sp,48
8000044c: fca42e23 sw a0,-36(s0)
80000450: fe042623 sw zero,-20(s0)
80000454: 0100006f j 80000464 <sleep+0x24>
80000458: fec42783 lw a5,-20(s0)
8000045c: 00178793 addi a5,a5,1
80000460: fef42623 sw a5,-20(s0)
80000464: fec42703 lw a4,-20(s0)
80000468: fdc42783 lw a5,-36(s0)
8000046c: fef746e3 blt a4,a5,80000458 <sleep+0x18>
80000470: 00000013 nop
80000474: 02c12403 lw s0,44(sp)
80000478: 03010113 addi sp,sp,48
8000047c: 00008067 ret
80000438 <createWarps>:
80000438: fb010113 addi sp,sp,-80
8000043c: 04112623 sw ra,76(sp)
80000440: 04812423 sw s0,72(sp)
80000444: 05010413 addi s0,sp,80
80000448: fca42623 sw a0,-52(s0)
8000044c: fcb42423 sw a1,-56(s0)
80000450: fcc42223 sw a2,-60(s0)
80000454: fcd42023 sw a3,-64(s0)
80000458: fae42e23 sw a4,-68(s0)
8000045c: faf42c23 sw a5,-72(s0)
80000460: 00010913 mv s2,sp
80000464: fe042623 sw zero,-20(s0)
80000468: 05c0006f j 800004c4 <createWarps+0x8c>
8000046c: ffff09b7 lui s3,0xffff0
80000470: 01310133 add sp,sp,s3
80000474: fec42783 lw a5,-20(s0)
80000478: fcf42823 sw a5,-48(s0)
8000047c: fc842783 lw a5,-56(s0)
80000480: fcf42a23 sw a5,-44(s0)
80000484: 00010793 mv a5,sp
80000488: fcf42c23 sw a5,-40(s0)
8000048c: fc442783 lw a5,-60(s0)
80000490: fcf42e23 sw a5,-36(s0)
80000494: fc042783 lw a5,-64(s0)
80000498: fef42023 sw a5,-32(s0)
8000049c: fbc42783 lw a5,-68(s0)
800004a0: fef42223 sw a5,-28(s0)
800004a4: fb842783 lw a5,-72(s0)
800004a8: fef42423 sw a5,-24(s0)
800004ac: fd040793 addi a5,s0,-48
800004b0: 00078513 mv a0,a5
800004b4: d05ff0ef jal ra,800001b8 <queue_enqueue>
800004b8: fec42783 lw a5,-20(s0)
800004bc: 00178793 addi a5,a5,1
800004c0: fef42623 sw a5,-20(s0)
800004c4: fec42703 lw a4,-20(s0)
800004c8: fcc42783 lw a5,-52(s0)
800004cc: faf760e3 bltu a4,a5,8000046c <createWarps+0x34>
800004d0: 00090113 mv sp,s2
800004d4: e91ff0ef jal ra,80000364 <schedule_warps>
800004d8: 06400513 li a0,100
800004dc: f1dff0ef jal ra,800003f8 <sleep>
800004e0: 00000013 nop
800004e4: 04c12083 lw ra,76(sp)
800004e8: 04812403 lw s0,72(sp)
800004ec: 05010113 addi sp,sp,80
800004f0: 00008067 ret
80000480 <createWarps>:
80000480: fb010113 addi sp,sp,-80
80000484: 04112623 sw ra,76(sp)
80000488: 04812423 sw s0,72(sp)
8000048c: 05010413 addi s0,sp,80
80000490: fca42623 sw a0,-52(s0)
80000494: fcb42423 sw a1,-56(s0)
80000498: fcc42223 sw a2,-60(s0)
8000049c: fcd42023 sw a3,-64(s0)
800004a0: fae42e23 sw a4,-68(s0)
800004a4: faf42c23 sw a5,-72(s0)
800004a8: 00010913 mv s2,sp
800004ac: fe042623 sw zero,-20(s0)
800004b0: 05c0006f j 8000050c <createWarps+0x8c>
800004b4: ffff09b7 lui s3,0xffff0
800004b8: 01310133 add sp,sp,s3
800004bc: fec42783 lw a5,-20(s0)
800004c0: fcf42823 sw a5,-48(s0)
800004c4: fc842783 lw a5,-56(s0)
800004c8: fcf42a23 sw a5,-44(s0)
800004cc: 00010793 mv a5,sp
800004d0: fcf42c23 sw a5,-40(s0)
800004d4: fc442783 lw a5,-60(s0)
800004d8: fcf42e23 sw a5,-36(s0)
800004dc: fc042783 lw a5,-64(s0)
800004e0: fef42023 sw a5,-32(s0)
800004e4: fbc42783 lw a5,-68(s0)
800004e8: fef42223 sw a5,-28(s0)
800004ec: fb842783 lw a5,-72(s0)
800004f0: fef42423 sw a5,-24(s0)
800004f4: fd040793 addi a5,s0,-48
800004f8: 00078513 mv a0,a5
800004fc: d05ff0ef jal ra,80000200 <queue_enqueue>
80000500: fec42783 lw a5,-20(s0)
80000504: 00178793 addi a5,a5,1
80000508: fef42623 sw a5,-20(s0)
8000050c: fec42703 lw a4,-20(s0)
80000510: fcc42783 lw a5,-52(s0)
80000514: faf760e3 bltu a4,a5,800004b4 <createWarps+0x34>
80000518: 00090113 mv sp,s2
8000051c: e91ff0ef jal ra,800003ac <schedule_warps>
80000520: 06400513 li a0,100
80000524: f1dff0ef jal ra,80000440 <sleep>
80000528: 00000013 nop
8000052c: 04c12083 lw ra,76(sp)
80000530: 04812403 lw s0,72(sp)
80000534: 05010113 addi sp,sp,80
80000538: 00008067 ret
800004f4 <get_1st_arg>:
800004f4: ff010113 addi sp,sp,-16
800004f8: 00812623 sw s0,12(sp)
800004fc: 01712423 sw s7,8(sp)
80000500: 01010413 addi s0,sp,16
80000504: 000b8793 mv a5,s7
80000508: 00078513 mv a0,a5
8000050c: 00c12403 lw s0,12(sp)
80000510: 00812b83 lw s7,8(sp)
80000514: 01010113 addi sp,sp,16
80000518: 00008067 ret
8000053c <get_1st_arg>:
8000053c: ff010113 addi sp,sp,-16
80000540: 00812623 sw s0,12(sp)
80000544: 01712423 sw s7,8(sp)
80000548: 01010413 addi s0,sp,16
8000054c: 000b8793 mv a5,s7
80000550: 00078513 mv a0,a5
80000554: 00c12403 lw s0,12(sp)
80000558: 00812b83 lw s7,8(sp)
8000055c: 01010113 addi sp,sp,16
80000560: 00008067 ret
8000051c <get_2nd_arg>:
8000051c: ff010113 addi sp,sp,-16
80000520: 00812623 sw s0,12(sp)
80000524: 01812423 sw s8,8(sp)
80000528: 01010413 addi s0,sp,16
8000052c: 000c0793 mv a5,s8
80000530: 00078513 mv a0,a5
80000534: 00c12403 lw s0,12(sp)
80000538: 00812c03 lw s8,8(sp)
8000053c: 01010113 addi sp,sp,16
80000540: 00008067 ret
80000564 <get_2nd_arg>:
80000564: ff010113 addi sp,sp,-16
80000568: 00812623 sw s0,12(sp)
8000056c: 01812423 sw s8,8(sp)
80000570: 01010413 addi s0,sp,16
80000574: 000c0793 mv a5,s8
80000578: 00078513 mv a0,a5
8000057c: 00c12403 lw s0,12(sp)
80000580: 00812c03 lw s8,8(sp)
80000584: 01010113 addi sp,sp,16
80000588: 00008067 ret
80000544 <get_3rd_arg>:
80000544: ff010113 addi sp,sp,-16
80000548: 00812623 sw s0,12(sp)
8000054c: 01912423 sw s9,8(sp)
80000550: 01010413 addi s0,sp,16
80000554: 000c8793 mv a5,s9
80000558: 00078513 mv a0,a5
8000055c: 00c12403 lw s0,12(sp)
80000560: 00812c83 lw s9,8(sp)
80000564: 01010113 addi sp,sp,16
80000568: 00008067 ret
8000058c <get_3rd_arg>:
8000058c: ff010113 addi sp,sp,-16
80000590: 00812623 sw s0,12(sp)
80000594: 01912423 sw s9,8(sp)
80000598: 01010413 addi s0,sp,16
8000059c: 000c8793 mv a5,s9
800005a0: 00078513 mv a0,a5
800005a4: 00c12403 lw s0,12(sp)
800005a8: 00812c83 lw s9,8(sp)
800005ac: 01010113 addi sp,sp,16
800005b0: 00008067 ret
Disassembly of section .bss:

Binary file not shown.

View file

@ -1,91 +1,96 @@
:0200000480007A
:1000000037F1FF7FEF00C018EF008006730000009B
:1000000037F1FF7FEF00401DEF0080067300000016
:10001000938B0600130C0700938C0700130F01004D
:100020009303050013051000635C75001301018044
:10003000130305006B500300130515006FF0DFFE7E
:1000400013010F0013050000930F0600938D0300AA
:10005000EBE0BF01170500001305852A6B40050082
:10005000EBE0BF01170500001305052F6B400500FD
:1000600017030000130303FB6B000300678000000D
:10007000130101FF23261100232481001304010131
:10008000B707008193870700370700811307C76010
:10009000B70600819386C65C370600801306460DBE
:1000A0009305100013050001EF0000391300000054
:1000B000EF00402193070500E38C07FE9307000043
:1000A0009305700013053000EF00803D1300000041
:1000B000EF00C02593070500E38C07FE93070000BF
:1000C000138507008320C10003248100130101016F
:1000D00067800000130101FD232611022324810201
:1000E00013040103232EA4FC232CB4FCEF00804056
:1000F0002326A4FEEF0080422324A4FEEF008044C8
:100100002322A4FE032784FD8327C4FDB307F70041
:100110002320F4FE832704FE939727000327C4FEC1
:10012000B307F70083A60700832704FE93972700F1
:10013000032784FEB307F70003A70700832704FE05
:1001400093972700032644FEB307F6003387E600A3
:1001500023A0E700832784FD138717009307070078
:1001600093971700B387E70093973700B387E700AB
:100170009397270013850700EF0000281300000065
:100180008320C10203248102130101036780000060
:1001900097020001938202EB1303000093037000A7
:1001A00023A0620023A2620023A4620023A672009F
:1001B00023A862006780000097020001938282E812
:1001C00003A382001303130023A4620013834201DC
:1001D00083AE420093935E0033037300032E050049
:1001E0002320C301032E45002322C301032E8500D3
:1001F0002324C301032EC5002326C301032E0501BA
:100200002328C301032E4501232AC301032E8501A0
:10021000232CC301938E1E00130F20036394EE0161
:10022000930E000023A2D201678000009702000114
:10023000938242E103A382001303F3FF23A462002D
:100240001383420183AE0200930F2003138F0E002D
:10025000130F1F006314FF01130F000023A0E2011E
:1002600093935E0033037300032E03002320C50124
:10027000032E43002322C501032E83002324C5013E
:10028000032EC3002326C501032E03012328C50125
:10029000032E4301232AC501032E8301232CC5010C
:1002A00067800000970200019382C2D903A38200F5
:1002B00013050000130E200363146E0013051500D0
:1002C00067800000970200019382C2D703A38200D7
:1002D00013050000130E000063146E0013051500D3
:1002E00067800000970200019382C2D503A3C20079
:1002F00083A3020133B5630067800000130101FD91
:10030000232611022324810213040103EFF09FFB33
:10031000930705006384070073000000930744FD02
:1003200013850700EFF09FF08327C4FD13810700BA
:10033000032584FD832544FD032604FE832644FE15
:10034000032784FE8327C4FEEFF09FCC73000000D8
:10035000130000008320C102032481021301010362
:1003600067800000130101FD23261102232481026E
:1003700013040103930901006F000005B707008112
:100380009387070483A7070113871700B707008126
:100390009387070423A8E700930744FD138507000C
:1003A000EFF0DFE88327C4FD13810700032584FDF8
:1003B000832544FD032604FE832644FE032784FE92
:1003C0008327C4FEEFF0DFC9EFF0DFEF93070500EE
:1003D00063980700EFF01FF193070500E39007FA19
:1003E00013810900130000008320C102032481024D
:1003F0001301010367800000130101FD2326810220
:1004000013040103232EA4FC232604FE6F00000125
:100410008327C4FE938717002326F4FE0327C4FE18
:100420008327C4FDE346F7FE130000000324C10246
:100430001301010367800000130101FB232611044F
:1004400023248104130401052326A4FC2324B4FCE3
:100450002322C4FC2320D4FC232EE4FA232CF4FA18
:1004600013090100232604FE6F00C005B709FFFF32
:10047000330131018327C4FE2328F4FC832784FC45
:10048000232AF4FC93070100232CF4FC832744FC6B
:10049000232EF4FC832704FC2320F4FE8327C4FBD3
:1004A0002322F4FE832784FB2324F4FE930704FD18
:1004B00013850700EFF05FD08327C4FE93871700F2
:1004C0002326F4FE0327C4FE8327C4FCE360F7FA67
:1004D00013010900EFF01FE913054006EFF0DFF10B
:1004E000130000008320C1040324810413010105CB
:1004F00067800000130101FF23268100232471017E
:100500001304010193870B00138507000324C10026
:10051000832B81001301010167800000130101FF9B
:1005200023268100232481011304010193070C0079
:10053000138507000324C100032C8100130101016E
:1005400067800000130101FF23268100232491010D
:100550001304010193870C00138507000324C100D5
:0C056000832C8100130101016780000062
:1000D00067800000130101FC232E1102232C8102F2
:1000E000130401042326A4FC2324B4FCEF000045E0
:1000F0002326A4FEEF0000472324A4FEEF000049BE
:100100002322A4FE032784FC93070700939737005C
:10011000B387E7400327C4FCB307F7002320F4FEAE
:10012000832704FE93B7A70093C71700A30FF4FC1F
:100130008347F4FD138F0700B7070080938F47199B
:100140006B200F007B70FF01832704FE939727002D
:100150000327C4FEB307F70083A60700832704FE26
:1001600093972700032784FEB307F70003A7070030
:10017000832704FE93972700032644FEB307F60067
:100180003387E60023A0E700B7070080138E8719A6
:1001900067000E00130000006B300000832784FC12
:1001A000138717009307070093971700B387E7009B
:1001B00093973700B387E7009397270013850700CD
:1001C000EF000028130000008320C10303248103F3
:1001D000130101046780000097020001938282E608
:1001E000130300009303700023A0620023A26200A7
:1001F00023A4620023A6720023A862006780000087
:1002000097020001938202E403A382001303130008
:1002100023A462001383420183AE420093935E00E5
:1002200033037300032E05002320C301032E450072
:100230002322C301032E85002324C301032EC500FE
:100240002326C301032E05012328C301032E4501E4
:10025000232AC301032E8501232CC301938E1E0084
:10026000130F20036394EE01930E000023A2D2012A
:1002700067800000970200019382C2DC03A3820022
:100280001303F3FF23A462001383420183AE020031
:10029000930F2003138F0E00130F1F006314FF0131
:1002A000130F000023A0E20193935E003303730059
:1002B000032E03002320C501032E43002322C50182
:1002C000032E83002324C501032EC3002326C5016A
:1002D000032E03012328C501032E4301232AC50150
:1002E000032E8301232CC5016780000097020001C3
:1002F000938242D503A3820013050000130E20034E
:1003000063146E001305150067800000970200015A
:10031000938242D303A3820013050000130E000052
:1003200063146E001305150067800000970200013A
:10033000938242D103A3C20083A3020133B56300B9
:1003400067800000130101FD23261102232481028E
:1003500013040103EFF09FFB93070500638407007C
:1003600073000000930744FD13850700EFF09FF032
:100370008327C4FD13810700032584FD832544FDE5
:10038000032604FE832644FE032784FE8327C4FE3F
:10039000EFF01FC873000000130000008320C102AB
:1003A000032481021301010367800000130101FD92
:1003B000232611022324810213040103930901005F
:1003C0006F000005B70700819387070483A7070123
:1003D00013871700B70700819387070423A8E70056
:1003E000930744FD13850700EFF0DFE88327C4FD82
:1003F00013810700032584FD832544FD032604FEA5
:10040000832644FE032784FE8327C4FEEFF05FC5E6
:10041000EFF0DFEF9307050063980700EFF01FF19F
:1004200093070500E39007FA138109001300000009
:100430008320C102032481021301010367800000AD
:10044000130101FD2326810213040103232EA4FCC2
:10045000232604FE6F0000018327C4FE9387170044
:100460002326F4FE0327C4FE8327C4FDE346F7FEDC
:10047000130000000324C102130101036780000080
:10048000130101FB23261104232481041304010515
:100490002326A4FC2324B4FC2322C4FC2320D4FC64
:1004A000232EE4FA232CF4FA13090100232604FE78
:1004B0006F00C005B709FFFF330131018327C4FE78
:1004C0002328F4FC832784FC232AF4FC93070100EF
:1004D000232CF4FC832744FC232EF4FC832704FC08
:1004E0002320F4FE8327C4FB2322F4FE832784FB0E
:1004F0002324F4FE930704FD13850700EFF05FD07B
:100500008327C4FE938717002326F4FE0327C4FE27
:100510008327C4FCE360F7FA13010900EFF01FE939
:1005200013054006EFF0DFF1130000008320C10443
:10053000032481041301010567800000130101FFFA
:1005400023268100232471011304010193870B00EA
:10055000138507000324C100832B810013010101CF
:1005600067800000130101FF2326810023248101FD
:100570001304010193070C00138507000324C10035
:10058000032C81001301010167800000130101FFAA
:1005900023268100232491011304010193870C0079
:1005A000138507000324C100832C8100130101017E
:0405B0006780000060
:02000004810079
:1005CC000100000001000000060000000000000017
:1005DC000300000001000000010000000200000008

View file

@ -71,19 +71,6 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt
schedule_warps();
sleep(100);
// asm __volatile__("addi t5, sp, 0");
// for (unsigned i = 1; i < num_Warps; i++)
// {
// asm __volatile__("addi sp, sp, -2048");
// wspawn(num_threads, i, func, x_ptr, y_ptr, z_ptr);
// }
// asm __volatile__("addi sp, t5, 0");
// createThreads(num_threads, 0, (unsigned) func, x_ptr, y_ptr, z_ptr);
}

View file

@ -10,24 +10,23 @@
#define ECALL asm __volatile__(".word 0x00000073");
#define JMPRT asm __volatile__(".word 0x5406b");
#define SPLIT asm __volatile__(".word 0xf206b");
#define P_JUMP asm __volatile__(".word 0x1ff706b");
#define P_JUMP asm __volatile__(".word 0x1ff707b");
#define JOIN asm __volatile__(".word 0x306b");
// #define __if(val) { \
#define __if(val) bool temp = !val; \
register unsigned p asm("t5") = temp; \
register void * e asm("t6") = &&ELSE; \
SPLIT; \
P_JUMP; \
// register unsigned p asm("t5") = val; \
// register unsigned * e asm("t6") = &&ELSE; \
// SPLIT; \
// P_JUMP; \
// }
#define __else register void * w asm("t3") = &&AFTER; \
asm __volatile__("jr t3"); \
ELSE: asm __volatile__("nop");
// #define __else asm __volatile__("j AFTER"); \
// ELSE: asm __volatile__("nop");
// #define __end_if AFTER: JOIN;
#define __end_if AFTER:\
JOIN;
#define FUNC void (func)(unsigned, unsigned)

View file

@ -7,14 +7,14 @@
.type queue_initialize, @function
.global queue_initialize
queue_initialize:
la t0, q # loading base address of q
li t1, 0 # to initialize variables
li t2, 7 # Num of available warps
sw t1, 0 (t0) # start_i
sw t1, 4 (t0) # end_i
sw t1, 8 (t0) # num_j
sw t2, 12(t0) # total_warps
sw t1, 16(t0) # active_warps
la t0, q # loading base address of q
li t1, 0 # to initialize variables
li t2, A_WARPS # Num of available warps
sw t1, 0 (t0) # start_i
sw t1, 4 (t0) # end_i
sw t1, 8 (t0) # num_j
sw t2, 12(t0) # total_warps
sw t1, 16(t0) # active_warps
ret
@ -125,4 +125,3 @@ queue_availableWarps:
lw t2, 16(t0) # t2 = active_warps
sltu a0, t2, t1
ret