mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Warp Scheduling + Control Divergence working and stable
This commit is contained in:
parent
087a39ccf4
commit
af4303a4ca
13 changed files with 298246 additions and 496 deletions
20
src/core.cpp
20
src/core.cpp
|
@ -203,20 +203,22 @@ void Warp::step() {
|
|||
D_RAW(' ' << hex << reg[j][i] << ' ');
|
||||
D_RAW('(' << shadowReg[i] << ')' << endl);
|
||||
}
|
||||
D(3, "Predicate state:");
|
||||
D_RAW(" ");
|
||||
for (unsigned j = 0; j < pred.size(); ++j) {
|
||||
for (unsigned i = 0; i < pred[j].size(); ++i) D_RAW(pred[j][i]);
|
||||
D_RAW(endl);
|
||||
}
|
||||
D_RAW(" (");
|
||||
for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
|
||||
D_RAW(')' << endl);
|
||||
// D(3, "Predicate state:");
|
||||
// D_RAW(" ");
|
||||
// for (unsigned j = 0; j < pred.size(); ++j) {
|
||||
// for (unsigned i = 0; i < pred[j].size(); ++i) D_RAW(pred[j][i]);
|
||||
// D_RAW(endl);
|
||||
// }
|
||||
// D_RAW(" (");
|
||||
// for (unsigned i = 0; i < shadowPReg.size(); ++i) D_RAW(shadowPReg[i]);
|
||||
// D_RAW(')' << endl);
|
||||
|
||||
D(3, "Thread mask:");
|
||||
D_RAW(" ");
|
||||
for (unsigned i = 0; i < tmask.size(); ++i) D_RAW(tmask[i] << ' ');
|
||||
D_RAW(endl);
|
||||
D_RAW(endl);
|
||||
D_RAW(endl);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -57,8 +57,13 @@ namespace Harp {
|
|||
std::vector<bool> &tm, Word pc
|
||||
): pc(pc), fallThrough(false), uni(false)
|
||||
{
|
||||
std::cout << "DomStackEntry TMASK: ";
|
||||
for (unsigned i = 0; i < m.size(); ++i)
|
||||
{
|
||||
std::cout << " " << (!bool(m[i][p]) && tm[i]);
|
||||
tmask.push_back(!bool(m[i][p]) && tm[i]);
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
DomStackEntry(const std::vector<bool> &tmask):
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#ifndef __DEBUG_H
|
||||
#define __DEBUG_H
|
||||
|
||||
// #define USE_DEBUG 9
|
||||
#define USE_DEBUG 9
|
||||
|
||||
#ifdef USE_DEBUG
|
||||
#include <iostream>
|
||||
|
|
|
@ -28,6 +28,7 @@ namespace Harp {
|
|||
SYS_INST = 115,
|
||||
TRAP = 0x7f,
|
||||
FENCE = 0x0f,
|
||||
PJ_INST = 0x7b,
|
||||
GPGPU = 0x6b
|
||||
};
|
||||
|
||||
|
@ -56,6 +57,7 @@ namespace Harp {
|
|||
{Opcode::SYS_INST, {"SYS" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::TRAP, {"TRAP" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::FENCE, {"fence" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::PJ_INST, {"pred j", true , false, false, false, InstType::R_TYPE }},
|
||||
{Opcode::GPGPU, {"gpgpu" , false, false, false, false, InstType::R_TYPE }}
|
||||
};
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ ostream &Harp::operator<<(ostream& os, Instruction &inst) {
|
|||
return os;
|
||||
}
|
||||
|
||||
bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<bool> > >& m,
|
||||
bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<Word> > >& m,
|
||||
const std::vector<bool> &tm) {
|
||||
bool same;
|
||||
unsigned i;
|
||||
|
@ -57,15 +57,19 @@ bool checkUnanimous(unsigned p, const std::vector<std::vector<Reg<bool> > >& m,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (i == m.size())
|
||||
throw DivergentBranchException();
|
||||
if (i == m.size()) throw DivergentBranchException();
|
||||
|
||||
std::cout << "same: " << same << " with -> ";
|
||||
for (; i < m.size(); ++i) {
|
||||
if (tm[i]) {
|
||||
std::cout << " " << (bool(m[i][p]));
|
||||
if (same != (bool(m[i][p]))) {
|
||||
std::cout << " FALSE\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << " TRUE\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -122,11 +126,20 @@ void Instruction::executeOn(Warp &c) {
|
|||
vector<Reg<bool> > &pReg(c.pred[t]);
|
||||
stack<DomStackEntry> &domStack(c.domStack);
|
||||
|
||||
std::cout << std::hex << "opcode: " << op << " func3: " << func3 << "\n";
|
||||
if (op == GPGPU) std::cout << "OPCODE MATCHED GPGPU\n";
|
||||
|
||||
// If this thread is masked out, don't execute the instruction, unless it's
|
||||
// a split or join.
|
||||
// if (((predicated && !pReg[pred]) || !c.tmask[t]) &&
|
||||
// op != SPLIT && op != JOIN) continue;
|
||||
|
||||
predicated = (op == GPGPU) && ((func3 == 7) || (func3 == 2));
|
||||
bool split = (op == GPGPU) && (func3 == 2);
|
||||
bool join = (op == GPGPU) && (func3 == 3);
|
||||
|
||||
if (((predicated && !reg[pred]) || !c.tmask[t]) && !split && !join) continue;
|
||||
|
||||
++c.insts;
|
||||
|
||||
Word memAddr;
|
||||
|
@ -134,13 +147,14 @@ void Instruction::executeOn(Warp &c) {
|
|||
Word shamt;
|
||||
Word temp;
|
||||
Word data_read;
|
||||
// Word pred;
|
||||
DomStackEntry e(pred, c.reg, c.tmask, c.pc);
|
||||
int op1, op2;
|
||||
switch (op) {
|
||||
|
||||
case NOP: break;
|
||||
case NOP:
|
||||
std::cout << "NOP_INST\n";
|
||||
break;
|
||||
case R_INST:
|
||||
std::cout << "R_INST\n";
|
||||
switch (func3)
|
||||
{
|
||||
case 0:
|
||||
|
@ -207,7 +221,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
break;
|
||||
|
||||
case L_INST:
|
||||
|
||||
std::cout << "L_INST\n";
|
||||
memAddr = ((reg[rsrc[0]] + immsrc) & 0xFFFFFFFC);
|
||||
shift_by = ((reg[rsrc[0]] + immsrc) & 0x00000003) * 8;
|
||||
data_read = c.core->mem.read(memAddr, c.supervisorMode);
|
||||
|
@ -245,6 +259,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
}
|
||||
break;
|
||||
case I_INST:
|
||||
std::cout << "I_INST\n";
|
||||
switch (func3)
|
||||
{
|
||||
|
||||
|
@ -330,6 +345,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
}
|
||||
break;
|
||||
case S_INST:
|
||||
std::cout << "S_INST\n";
|
||||
++c.stores;
|
||||
memAddr = reg[rsrc[0]] + immsrc;
|
||||
// std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
|
||||
|
@ -359,6 +375,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
#endif
|
||||
break;
|
||||
case B_INST:
|
||||
std::cout << "B_INST\n";
|
||||
switch (func3)
|
||||
{
|
||||
case 0:
|
||||
|
@ -412,13 +429,17 @@ void Instruction::executeOn(Warp &c) {
|
|||
}
|
||||
break;
|
||||
case LUI_INST:
|
||||
std::cout << "LUI_INST\n";
|
||||
reg[rdest] = (immsrc << 12) & 0xfffff000;
|
||||
break;
|
||||
case AUIPC_INST:
|
||||
std::cout << "AUIPC_INST\n";
|
||||
reg[rdest] = ((immsrc << 12) & 0xfffff000) + (c.pc - 4);
|
||||
break;
|
||||
case JAL_INST:
|
||||
std::cout << "JAL_INST\n";
|
||||
if (!pcSet) nextPc = (c.pc - 4) + immsrc;
|
||||
if (!pcSet) std::cout << "JAL... SETTING PC: " << nextPc << "\n";
|
||||
if (rdest != 0)
|
||||
{
|
||||
reg[rdest] = c.pc;
|
||||
|
@ -426,8 +447,9 @@ void Instruction::executeOn(Warp &c) {
|
|||
pcSet = true;
|
||||
break;
|
||||
case JALR_INST:
|
||||
std::cout << "JALR_INST\n";
|
||||
if (!pcSet) nextPc = reg[rsrc[0]] + immsrc;
|
||||
|
||||
if (!pcSet) std::cout << "JALR... SETTING PC: " << nextPc << "\n";
|
||||
if (rdest != 0)
|
||||
{
|
||||
reg[rdest] = c.pc;
|
||||
|
@ -435,6 +457,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
pcSet = true;
|
||||
break;
|
||||
case SYS_INST:
|
||||
std::cout << "SYS_INST\n";
|
||||
temp = reg[rsrc[0]];
|
||||
switch (func3)
|
||||
{
|
||||
|
@ -505,65 +528,107 @@ void Instruction::executeOn(Warp &c) {
|
|||
c.interrupt(0);
|
||||
break;
|
||||
case FENCE:
|
||||
std::cout << "FENCE_INST\n";
|
||||
break;
|
||||
case PJ_INST:
|
||||
// pred jump reg
|
||||
std::cout << "pred jump... src: " << rsrc[0] << std::hex << " val: " << reg[rsrc[0]] << " dest: " << reg[rsrc[1]] << "\n";
|
||||
if (reg[rsrc[0]])
|
||||
{
|
||||
if (!pcSet) nextPc = reg[rsrc[1]];
|
||||
pcSet = true;
|
||||
}
|
||||
break;
|
||||
case GPGPU:
|
||||
std::cout << "GPGPU\n";
|
||||
switch(func3)
|
||||
{
|
||||
case 0:
|
||||
// WSPAWN
|
||||
D(0, "Spawning a new warp.");
|
||||
// std::cout << "SIZE: " << c.core->w.size() << "\n";
|
||||
for (unsigned i = 0; i < c.core->w.size(); ++i)
|
||||
std::cout << "WSPAWN\n";
|
||||
if (sjOnce)
|
||||
{
|
||||
// std::cout << "WHATTT\n";
|
||||
Warp &newWarp(c.core->w[i]);
|
||||
// std::cout << "STARTING\n";
|
||||
if (newWarp.spawned == false) {
|
||||
// std::cout << "ABOUT TO START\n";
|
||||
newWarp.pc = reg[rsrc[0]];
|
||||
newWarp.reg[0] = reg;
|
||||
newWarp.csr = c.csr;
|
||||
newWarp.activeThreads = 1;
|
||||
newWarp.supervisorMode = false;
|
||||
newWarp.spawned = true;
|
||||
break;
|
||||
sjOnce = false;
|
||||
D(0, "Spawning a new warp.");
|
||||
// std::cout << "SIZE: " << c.core->w.size() << "\n";
|
||||
for (unsigned i = 0; i < c.core->w.size(); ++i)
|
||||
{
|
||||
// std::cout << "WHATTT\n";
|
||||
Warp &newWarp(c.core->w[i]);
|
||||
// std::cout << "STARTING\n";
|
||||
if (newWarp.spawned == false) {
|
||||
// std::cout << "ABOUT TO START\n";
|
||||
newWarp.pc = reg[rsrc[0]];
|
||||
newWarp.reg[0] = reg;
|
||||
newWarp.csr = c.csr;
|
||||
newWarp.activeThreads = 1;
|
||||
newWarp.supervisorMode = false;
|
||||
newWarp.spawned = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
// SPLIT
|
||||
c.domStack.push(c.tmask);
|
||||
c.domStack.push(e);
|
||||
|
||||
for (unsigned i = 0; i < e.tmask.size(); ++i)
|
||||
std::cout << "SPLIT\n";
|
||||
if (sjOnce)
|
||||
{
|
||||
c.tmask[i] = !e.tmask[i] && c.tmask[i];
|
||||
sjOnce = false;
|
||||
if (checkUnanimous(pred, c.reg, c.tmask)) {
|
||||
std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
|
||||
DomStackEntry e(c.tmask);
|
||||
e.uni = true;
|
||||
c.domStack.push(e);
|
||||
break;
|
||||
}
|
||||
DomStackEntry e(pred, c.reg, c.tmask, c.pc);
|
||||
c.domStack.push(c.tmask);
|
||||
c.domStack.push(e);
|
||||
for (unsigned i = 0; i < e.tmask.size(); ++i)
|
||||
{
|
||||
c.tmask[i] = !e.tmask[i] && c.tmask[i];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// JOIN
|
||||
if (!c.domStack.top().fallThrough) {
|
||||
if (!pcSet) nextPc = c.domStack.top().pc;
|
||||
pcSet = true;
|
||||
std::cout << "JOIN\n";
|
||||
if (sjOnce)
|
||||
{
|
||||
sjOnce = false;
|
||||
if (!c.domStack.empty() && c.domStack.top().uni) {
|
||||
D(2, "Uni branch at join");
|
||||
c.tmask = c.domStack.top().tmask;
|
||||
c.domStack.pop();
|
||||
break;
|
||||
}
|
||||
if (!c.domStack.top().fallThrough) {
|
||||
if (!pcSet) nextPc = c.domStack.top().pc;
|
||||
pcSet = true;
|
||||
}
|
||||
c.tmask = c.domStack.top().tmask;
|
||||
c.domStack.pop();
|
||||
}
|
||||
c.tmask = c.domStack.top().tmask;
|
||||
c.domStack.pop();
|
||||
break;
|
||||
case 4:
|
||||
// JMPRT
|
||||
std::cout << "JMPRT\n";
|
||||
nextActiveThreads = 1;
|
||||
if (!pcSet) nextPc = reg[rsrc[0]];
|
||||
pcSet = true;
|
||||
break;
|
||||
case 5:
|
||||
// CLONE
|
||||
// std::cout << "CLONE\n";
|
||||
std::cout << "CLONE\n";
|
||||
// std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
|
||||
c.reg[reg[rsrc[0]]] = reg;
|
||||
break;
|
||||
case 6:
|
||||
// JALRS
|
||||
std::cout << "JALRS\n";
|
||||
nextActiveThreads = reg[rsrc[1]];
|
||||
reg[rdest] = c.pc;
|
||||
if (!pcSet) nextPc = reg[rsrc[0]];
|
||||
|
@ -571,14 +636,6 @@ void Instruction::executeOn(Warp &c) {
|
|||
// std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
|
||||
// std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
|
||||
break;
|
||||
case 7:
|
||||
// pred jump reg
|
||||
if (reg[rsrc[0]])
|
||||
{
|
||||
nextPc = reg[rsrc[1]];
|
||||
pcSet = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n";
|
||||
}
|
||||
|
@ -593,6 +650,20 @@ void Instruction::executeOn(Warp &c) {
|
|||
|
||||
c.activeThreads = nextActiveThreads;
|
||||
|
||||
// if (nextActiveThreads != 0)
|
||||
// {
|
||||
// for (int i = 7; i >= c.activeThreads; i--)
|
||||
// {
|
||||
// c.tmask[i] = c.tmask[i] && false;
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
|
||||
// std::cout << "new thread mask: ";
|
||||
// for (int i = 0; i < c.tmask.size(); ++i) std::cout << " " << c.tmask[i];
|
||||
// std::cout << "\n";
|
||||
|
||||
// This way, if pc was set by a side effect (such as interrupt), it will
|
||||
// retain its new value.
|
||||
if (pcSet) c.pc = nextPc;
|
||||
|
|
297673
src/results.txt
297673
src/results.txt
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,5 @@
|
|||
// #include <stdint.h>
|
||||
// #include <stdbool.h>
|
||||
#include <stdbool.h>
|
||||
// #include <cstdint>
|
||||
|
||||
|
||||
|
@ -21,8 +21,8 @@ unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|||
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
#define NUM_WARPS 16
|
||||
#define NUM_THREADS 1
|
||||
#define NUM_WARPS 3
|
||||
#define NUM_THREADS 7
|
||||
|
||||
int main()
|
||||
{
|
||||
|
@ -44,20 +44,13 @@ void matAddition(unsigned tid, unsigned wid)
|
|||
|
||||
unsigned i = (wid * NUM_THREADS) + tid;
|
||||
|
||||
// int cond = i < 16;
|
||||
// __if(cond)
|
||||
__if((i < 10))
|
||||
z_ptr[i] = x_ptr[i] + y_ptr[i];
|
||||
__else
|
||||
__end_if
|
||||
|
||||
// // DO SOMETHING
|
||||
|
||||
// __else
|
||||
|
||||
// // DO SOMETHING ELSE
|
||||
|
||||
// __end_if
|
||||
|
||||
z_ptr[i] = x_ptr[i] + y_ptr[i];
|
||||
|
||||
sleep((100 * wid)+100);
|
||||
sleep((50 * (wid + wid))+100);
|
||||
|
||||
return;
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ Disassembly of section .text:
|
|||
|
||||
80000000 <_start>:
|
||||
80000000: 7ffff137 lui sp,0x7ffff
|
||||
80000004: 18c000ef jal ra,80000190 <queue_initialize>
|
||||
80000004: 1d4000ef jal ra,800001d8 <queue_initialize>
|
||||
80000008: 068000ef jal ra,80000070 <main>
|
||||
8000000c: 00000073 ecall
|
||||
|
||||
|
@ -39,7 +39,7 @@ Disassembly of section .text:
|
|||
8000004c: 00038d93 mv s11,t2
|
||||
80000050: 01bfe0eb 0x1bfe0eb
|
||||
80000054: 00000517 auipc a0,0x0
|
||||
80000058: 2a850513 addi a0,a0,680 # 800002fc <reschedule_warps>
|
||||
80000058: 2f050513 addi a0,a0,752 # 80000344 <reschedule_warps>
|
||||
8000005c: 0005406b 0x5406b
|
||||
|
||||
80000060 <wspawn>:
|
||||
|
@ -61,11 +61,11 @@ Disassembly of section .text:
|
|||
80000094: 5cc68693 addi a3,a3,1484 # 810005cc <y+0xffffffc0>
|
||||
80000098: 80000637 lui a2,0x80000
|
||||
8000009c: 0d460613 addi a2,a2,212 # 800000d4 <y+0xfefffac8>
|
||||
800000a0: 00100593 li a1,1
|
||||
800000a4: 01000513 li a0,16
|
||||
800000a8: 390000ef jal ra,80000438 <createWarps>
|
||||
800000a0: 00700593 li a1,7
|
||||
800000a4: 00300513 li a0,3
|
||||
800000a8: 3d8000ef jal ra,80000480 <createWarps>
|
||||
800000ac: 00000013 nop
|
||||
800000b0: 214000ef jal ra,800002c4 <queue_isEmpty>
|
||||
800000b0: 25c000ef jal ra,8000030c <queue_isEmpty>
|
||||
800000b4: 00050793 mv a5,a0
|
||||
800000b8: fe078ce3 beqz a5,800000b0 <main+0x40>
|
||||
800000bc: 00000793 li a5,0
|
||||
|
@ -76,334 +76,352 @@ Disassembly of section .text:
|
|||
800000d0: 00008067 ret
|
||||
|
||||
800000d4 <matAddition>:
|
||||
800000d4: fd010113 addi sp,sp,-48
|
||||
800000d8: 02112623 sw ra,44(sp)
|
||||
800000dc: 02812423 sw s0,40(sp)
|
||||
800000e0: 03010413 addi s0,sp,48
|
||||
800000e4: fca42e23 sw a0,-36(s0)
|
||||
800000e8: fcb42c23 sw a1,-40(s0)
|
||||
800000ec: 408000ef jal ra,800004f4 <get_1st_arg>
|
||||
800000d4: fc010113 addi sp,sp,-64
|
||||
800000d8: 02112e23 sw ra,60(sp)
|
||||
800000dc: 02812c23 sw s0,56(sp)
|
||||
800000e0: 04010413 addi s0,sp,64
|
||||
800000e4: fca42623 sw a0,-52(s0)
|
||||
800000e8: fcb42423 sw a1,-56(s0)
|
||||
800000ec: 450000ef jal ra,8000053c <get_1st_arg>
|
||||
800000f0: fea42623 sw a0,-20(s0)
|
||||
800000f4: 428000ef jal ra,8000051c <get_2nd_arg>
|
||||
800000f4: 470000ef jal ra,80000564 <get_2nd_arg>
|
||||
800000f8: fea42423 sw a0,-24(s0)
|
||||
800000fc: 448000ef jal ra,80000544 <get_3rd_arg>
|
||||
800000fc: 490000ef jal ra,8000058c <get_3rd_arg>
|
||||
80000100: fea42223 sw a0,-28(s0)
|
||||
80000104: fd842703 lw a4,-40(s0)
|
||||
80000108: fdc42783 lw a5,-36(s0)
|
||||
8000010c: 00f707b3 add a5,a4,a5
|
||||
80000110: fef42023 sw a5,-32(s0)
|
||||
80000114: fe042783 lw a5,-32(s0)
|
||||
80000118: 00279793 slli a5,a5,0x2
|
||||
8000011c: fec42703 lw a4,-20(s0)
|
||||
80000120: 00f707b3 add a5,a4,a5
|
||||
80000124: 0007a683 lw a3,0(a5) # 81000000 <y+0xfffff9f4>
|
||||
80000128: fe042783 lw a5,-32(s0)
|
||||
8000012c: 00279793 slli a5,a5,0x2
|
||||
80000130: fe842703 lw a4,-24(s0)
|
||||
80000134: 00f707b3 add a5,a4,a5
|
||||
80000138: 0007a703 lw a4,0(a5)
|
||||
8000013c: fe042783 lw a5,-32(s0)
|
||||
80000140: 00279793 slli a5,a5,0x2
|
||||
80000144: fe442603 lw a2,-28(s0)
|
||||
80000148: 00f607b3 add a5,a2,a5
|
||||
8000014c: 00e68733 add a4,a3,a4
|
||||
80000150: 00e7a023 sw a4,0(a5)
|
||||
80000154: fd842783 lw a5,-40(s0)
|
||||
80000158: 00178713 addi a4,a5,1
|
||||
8000015c: 00070793 mv a5,a4
|
||||
80000160: 00179793 slli a5,a5,0x1
|
||||
80000164: 00e787b3 add a5,a5,a4
|
||||
80000168: 00379793 slli a5,a5,0x3
|
||||
8000016c: 00e787b3 add a5,a5,a4
|
||||
80000170: 00279793 slli a5,a5,0x2
|
||||
80000174: 00078513 mv a0,a5
|
||||
80000178: 280000ef jal ra,800003f8 <sleep>
|
||||
8000017c: 00000013 nop
|
||||
80000180: 02c12083 lw ra,44(sp)
|
||||
80000184: 02812403 lw s0,40(sp)
|
||||
80000188: 03010113 addi sp,sp,48
|
||||
8000018c: 00008067 ret
|
||||
80000104: fc842703 lw a4,-56(s0)
|
||||
80000108: 00070793 mv a5,a4
|
||||
8000010c: 00379793 slli a5,a5,0x3
|
||||
80000110: 40e787b3 sub a5,a5,a4
|
||||
80000114: fcc42703 lw a4,-52(s0)
|
||||
80000118: 00f707b3 add a5,a4,a5
|
||||
8000011c: fef42023 sw a5,-32(s0)
|
||||
80000120: fe042783 lw a5,-32(s0)
|
||||
80000124: 00a7b793 sltiu a5,a5,10
|
||||
80000128: 0017c793 xori a5,a5,1
|
||||
8000012c: fcf40fa3 sb a5,-33(s0)
|
||||
80000130: fdf44783 lbu a5,-33(s0)
|
||||
80000134: 00078f13 mv t5,a5
|
||||
80000138: 800007b7 lui a5,0x80000
|
||||
8000013c: 19478f93 addi t6,a5,404 # 80000194 <y+0xfefffb88>
|
||||
80000140: 000f206b 0xf206b
|
||||
80000144: 01ff707b 0x1ff707b
|
||||
80000148: fe042783 lw a5,-32(s0)
|
||||
8000014c: 00279793 slli a5,a5,0x2
|
||||
80000150: fec42703 lw a4,-20(s0)
|
||||
80000154: 00f707b3 add a5,a4,a5
|
||||
80000158: 0007a683 lw a3,0(a5)
|
||||
8000015c: fe042783 lw a5,-32(s0)
|
||||
80000160: 00279793 slli a5,a5,0x2
|
||||
80000164: fe842703 lw a4,-24(s0)
|
||||
80000168: 00f707b3 add a5,a4,a5
|
||||
8000016c: 0007a703 lw a4,0(a5)
|
||||
80000170: fe042783 lw a5,-32(s0)
|
||||
80000174: 00279793 slli a5,a5,0x2
|
||||
80000178: fe442603 lw a2,-28(s0)
|
||||
8000017c: 00f607b3 add a5,a2,a5
|
||||
80000180: 00e68733 add a4,a3,a4
|
||||
80000184: 00e7a023 sw a4,0(a5)
|
||||
80000188: 800007b7 lui a5,0x80000
|
||||
8000018c: 19878e13 addi t3,a5,408 # 80000198 <y+0xfefffb8c>
|
||||
80000190: 000e0067 jr t3
|
||||
80000194: 00000013 nop
|
||||
80000198: 0000306b 0x306b
|
||||
8000019c: fc842783 lw a5,-56(s0)
|
||||
800001a0: 00178713 addi a4,a5,1
|
||||
800001a4: 00070793 mv a5,a4
|
||||
800001a8: 00179793 slli a5,a5,0x1
|
||||
800001ac: 00e787b3 add a5,a5,a4
|
||||
800001b0: 00379793 slli a5,a5,0x3
|
||||
800001b4: 00e787b3 add a5,a5,a4
|
||||
800001b8: 00279793 slli a5,a5,0x2
|
||||
800001bc: 00078513 mv a0,a5
|
||||
800001c0: 280000ef jal ra,80000440 <sleep>
|
||||
800001c4: 00000013 nop
|
||||
800001c8: 03c12083 lw ra,60(sp)
|
||||
800001cc: 03812403 lw s0,56(sp)
|
||||
800001d0: 04010113 addi sp,sp,64
|
||||
800001d4: 00008067 ret
|
||||
|
||||
80000190 <queue_initialize>:
|
||||
80000190: 01000297 auipc t0,0x1000
|
||||
80000194: eb028293 addi t0,t0,-336 # 81000040 <q>
|
||||
80000198: 00000313 li t1,0
|
||||
8000019c: 00700393 li t2,7
|
||||
800001a0: 0062a023 sw t1,0(t0)
|
||||
800001a4: 0062a223 sw t1,4(t0)
|
||||
800001a8: 0062a423 sw t1,8(t0)
|
||||
800001ac: 0072a623 sw t2,12(t0)
|
||||
800001b0: 0062a823 sw t1,16(t0)
|
||||
800001b4: 00008067 ret
|
||||
800001d8 <queue_initialize>:
|
||||
800001d8: 01000297 auipc t0,0x1000
|
||||
800001dc: e6828293 addi t0,t0,-408 # 81000040 <q>
|
||||
800001e0: 00000313 li t1,0
|
||||
800001e4: 00700393 li t2,7
|
||||
800001e8: 0062a023 sw t1,0(t0)
|
||||
800001ec: 0062a223 sw t1,4(t0)
|
||||
800001f0: 0062a423 sw t1,8(t0)
|
||||
800001f4: 0072a623 sw t2,12(t0)
|
||||
800001f8: 0062a823 sw t1,16(t0)
|
||||
800001fc: 00008067 ret
|
||||
|
||||
800001b8 <queue_enqueue>:
|
||||
800001b8: 01000297 auipc t0,0x1000
|
||||
800001bc: e8828293 addi t0,t0,-376 # 81000040 <q>
|
||||
800001c0: 0082a303 lw t1,8(t0)
|
||||
800001c4: 00130313 addi t1,t1,1
|
||||
800001c8: 0062a423 sw t1,8(t0)
|
||||
800001cc: 01428313 addi t1,t0,20
|
||||
800001d0: 0042ae83 lw t4,4(t0)
|
||||
800001d4: 005e9393 slli t2,t4,0x5
|
||||
800001d8: 00730333 add t1,t1,t2
|
||||
800001dc: 00052e03 lw t3,0(a0)
|
||||
800001e0: 01c32023 sw t3,0(t1)
|
||||
800001e4: 00452e03 lw t3,4(a0)
|
||||
800001e8: 01c32223 sw t3,4(t1)
|
||||
800001ec: 00852e03 lw t3,8(a0)
|
||||
800001f0: 01c32423 sw t3,8(t1)
|
||||
800001f4: 00c52e03 lw t3,12(a0)
|
||||
800001f8: 01c32623 sw t3,12(t1)
|
||||
800001fc: 01052e03 lw t3,16(a0)
|
||||
80000200: 01c32823 sw t3,16(t1)
|
||||
80000204: 01452e03 lw t3,20(a0)
|
||||
80000208: 01c32a23 sw t3,20(t1)
|
||||
8000020c: 01852e03 lw t3,24(a0)
|
||||
80000210: 01c32c23 sw t3,24(t1)
|
||||
80000214: 001e8e93 addi t4,t4,1
|
||||
80000218: 03200f13 li t5,50
|
||||
8000021c: 01ee9463 bne t4,t5,80000224 <ec>
|
||||
80000220: 00000e93 li t4,0
|
||||
80000200 <queue_enqueue>:
|
||||
80000200: 01000297 auipc t0,0x1000
|
||||
80000204: e4028293 addi t0,t0,-448 # 81000040 <q>
|
||||
80000208: 0082a303 lw t1,8(t0)
|
||||
8000020c: 00130313 addi t1,t1,1
|
||||
80000210: 0062a423 sw t1,8(t0)
|
||||
80000214: 01428313 addi t1,t0,20
|
||||
80000218: 0042ae83 lw t4,4(t0)
|
||||
8000021c: 005e9393 slli t2,t4,0x5
|
||||
80000220: 00730333 add t1,t1,t2
|
||||
80000224: 00052e03 lw t3,0(a0)
|
||||
80000228: 01c32023 sw t3,0(t1)
|
||||
8000022c: 00452e03 lw t3,4(a0)
|
||||
80000230: 01c32223 sw t3,4(t1)
|
||||
80000234: 00852e03 lw t3,8(a0)
|
||||
80000238: 01c32423 sw t3,8(t1)
|
||||
8000023c: 00c52e03 lw t3,12(a0)
|
||||
80000240: 01c32623 sw t3,12(t1)
|
||||
80000244: 01052e03 lw t3,16(a0)
|
||||
80000248: 01c32823 sw t3,16(t1)
|
||||
8000024c: 01452e03 lw t3,20(a0)
|
||||
80000250: 01c32a23 sw t3,20(t1)
|
||||
80000254: 01852e03 lw t3,24(a0)
|
||||
80000258: 01c32c23 sw t3,24(t1)
|
||||
8000025c: 001e8e93 addi t4,t4,1
|
||||
80000260: 03200f13 li t5,50
|
||||
80000264: 01ee9463 bne t4,t5,8000026c <ec>
|
||||
80000268: 00000e93 li t4,0
|
||||
|
||||
80000224 <ec>:
|
||||
80000224: 01d2a223 sw t4,4(t0)
|
||||
80000228: 00008067 ret
|
||||
8000026c <ec>:
|
||||
8000026c: 01d2a223 sw t4,4(t0)
|
||||
80000270: 00008067 ret
|
||||
|
||||
8000022c <queue_dequeue>:
|
||||
8000022c: 01000297 auipc t0,0x1000
|
||||
80000230: e1428293 addi t0,t0,-492 # 81000040 <q>
|
||||
80000234: 0082a303 lw t1,8(t0)
|
||||
80000238: fff30313 addi t1,t1,-1
|
||||
8000023c: 0062a423 sw t1,8(t0)
|
||||
80000240: 01428313 addi t1,t0,20
|
||||
80000244: 0002ae83 lw t4,0(t0)
|
||||
80000248: 03200f93 li t6,50
|
||||
8000024c: 000e8f13 mv t5,t4
|
||||
80000250: 001f0f13 addi t5,t5,1
|
||||
80000254: 01ff1463 bne t5,t6,8000025c <dc>
|
||||
80000258: 00000f13 li t5,0
|
||||
80000274 <queue_dequeue>:
|
||||
80000274: 01000297 auipc t0,0x1000
|
||||
80000278: dcc28293 addi t0,t0,-564 # 81000040 <q>
|
||||
8000027c: 0082a303 lw t1,8(t0)
|
||||
80000280: fff30313 addi t1,t1,-1
|
||||
80000284: 0062a423 sw t1,8(t0)
|
||||
80000288: 01428313 addi t1,t0,20
|
||||
8000028c: 0002ae83 lw t4,0(t0)
|
||||
80000290: 03200f93 li t6,50
|
||||
80000294: 000e8f13 mv t5,t4
|
||||
80000298: 001f0f13 addi t5,t5,1
|
||||
8000029c: 01ff1463 bne t5,t6,800002a4 <dc>
|
||||
800002a0: 00000f13 li t5,0
|
||||
|
||||
8000025c <dc>:
|
||||
8000025c: 01e2a023 sw t5,0(t0)
|
||||
80000260: 005e9393 slli t2,t4,0x5
|
||||
80000264: 00730333 add t1,t1,t2
|
||||
80000268: 00032e03 lw t3,0(t1)
|
||||
8000026c: 01c52023 sw t3,0(a0)
|
||||
80000270: 00432e03 lw t3,4(t1)
|
||||
80000274: 01c52223 sw t3,4(a0)
|
||||
80000278: 00832e03 lw t3,8(t1)
|
||||
8000027c: 01c52423 sw t3,8(a0)
|
||||
80000280: 00c32e03 lw t3,12(t1)
|
||||
80000284: 01c52623 sw t3,12(a0)
|
||||
80000288: 01032e03 lw t3,16(t1)
|
||||
8000028c: 01c52823 sw t3,16(a0)
|
||||
80000290: 01432e03 lw t3,20(t1)
|
||||
80000294: 01c52a23 sw t3,20(a0)
|
||||
80000298: 01832e03 lw t3,24(t1)
|
||||
8000029c: 01c52c23 sw t3,24(a0)
|
||||
800002a0: 00008067 ret
|
||||
800002a4 <dc>:
|
||||
800002a4: 01e2a023 sw t5,0(t0)
|
||||
800002a8: 005e9393 slli t2,t4,0x5
|
||||
800002ac: 00730333 add t1,t1,t2
|
||||
800002b0: 00032e03 lw t3,0(t1)
|
||||
800002b4: 01c52023 sw t3,0(a0)
|
||||
800002b8: 00432e03 lw t3,4(t1)
|
||||
800002bc: 01c52223 sw t3,4(a0)
|
||||
800002c0: 00832e03 lw t3,8(t1)
|
||||
800002c4: 01c52423 sw t3,8(a0)
|
||||
800002c8: 00c32e03 lw t3,12(t1)
|
||||
800002cc: 01c52623 sw t3,12(a0)
|
||||
800002d0: 01032e03 lw t3,16(t1)
|
||||
800002d4: 01c52823 sw t3,16(a0)
|
||||
800002d8: 01432e03 lw t3,20(t1)
|
||||
800002dc: 01c52a23 sw t3,20(a0)
|
||||
800002e0: 01832e03 lw t3,24(t1)
|
||||
800002e4: 01c52c23 sw t3,24(a0)
|
||||
800002e8: 00008067 ret
|
||||
|
||||
800002a4 <queue_isFull>:
|
||||
800002a4: 01000297 auipc t0,0x1000
|
||||
800002a8: d9c28293 addi t0,t0,-612 # 81000040 <q>
|
||||
800002ac: 0082a303 lw t1,8(t0)
|
||||
800002b0: 00000513 li a0,0
|
||||
800002b4: 03200e13 li t3,50
|
||||
800002b8: 006e1463 bne t3,t1,800002c0 <qf>
|
||||
800002bc: 00150513 addi a0,a0,1
|
||||
800002ec <queue_isFull>:
|
||||
800002ec: 01000297 auipc t0,0x1000
|
||||
800002f0: d5428293 addi t0,t0,-684 # 81000040 <q>
|
||||
800002f4: 0082a303 lw t1,8(t0)
|
||||
800002f8: 00000513 li a0,0
|
||||
800002fc: 03200e13 li t3,50
|
||||
80000300: 006e1463 bne t3,t1,80000308 <qf>
|
||||
80000304: 00150513 addi a0,a0,1
|
||||
|
||||
800002c0 <qf>:
|
||||
800002c0: 00008067 ret
|
||||
80000308 <qf>:
|
||||
80000308: 00008067 ret
|
||||
|
||||
800002c4 <queue_isEmpty>:
|
||||
800002c4: 01000297 auipc t0,0x1000
|
||||
800002c8: d7c28293 addi t0,t0,-644 # 81000040 <q>
|
||||
800002cc: 0082a303 lw t1,8(t0)
|
||||
800002d0: 00000513 li a0,0
|
||||
800002d4: 00000e13 li t3,0
|
||||
800002d8: 006e1463 bne t3,t1,800002e0 <qe>
|
||||
800002dc: 00150513 addi a0,a0,1
|
||||
8000030c <queue_isEmpty>:
|
||||
8000030c: 01000297 auipc t0,0x1000
|
||||
80000310: d3428293 addi t0,t0,-716 # 81000040 <q>
|
||||
80000314: 0082a303 lw t1,8(t0)
|
||||
80000318: 00000513 li a0,0
|
||||
8000031c: 00000e13 li t3,0
|
||||
80000320: 006e1463 bne t3,t1,80000328 <qe>
|
||||
80000324: 00150513 addi a0,a0,1
|
||||
|
||||
800002e0 <qe>:
|
||||
800002e0: 00008067 ret
|
||||
80000328 <qe>:
|
||||
80000328: 00008067 ret
|
||||
|
||||
800002e4 <queue_availableWarps>:
|
||||
800002e4: 01000297 auipc t0,0x1000
|
||||
800002e8: d5c28293 addi t0,t0,-676 # 81000040 <q>
|
||||
800002ec: 00c2a303 lw t1,12(t0)
|
||||
800002f0: 0102a383 lw t2,16(t0)
|
||||
800002f4: 0063b533 sltu a0,t2,t1
|
||||
800002f8: 00008067 ret
|
||||
8000032c <queue_availableWarps>:
|
||||
8000032c: 01000297 auipc t0,0x1000
|
||||
80000330: d1428293 addi t0,t0,-748 # 81000040 <q>
|
||||
80000334: 00c2a303 lw t1,12(t0)
|
||||
80000338: 0102a383 lw t2,16(t0)
|
||||
8000033c: 0063b533 sltu a0,t2,t1
|
||||
80000340: 00008067 ret
|
||||
|
||||
800002fc <reschedule_warps>:
|
||||
800002fc: fd010113 addi sp,sp,-48
|
||||
80000300: 02112623 sw ra,44(sp)
|
||||
80000304: 02812423 sw s0,40(sp)
|
||||
80000308: 03010413 addi s0,sp,48
|
||||
8000030c: fb9ff0ef jal ra,800002c4 <queue_isEmpty>
|
||||
80000310: 00050793 mv a5,a0
|
||||
80000314: 00078463 beqz a5,8000031c <reschedule_warps+0x20>
|
||||
80000318: 00000073 ecall
|
||||
8000031c: fd440793 addi a5,s0,-44
|
||||
80000320: 00078513 mv a0,a5
|
||||
80000324: f09ff0ef jal ra,8000022c <queue_dequeue>
|
||||
80000328: fdc42783 lw a5,-36(s0)
|
||||
8000032c: 00078113 mv sp,a5
|
||||
80000330: fd842503 lw a0,-40(s0)
|
||||
80000334: fd442583 lw a1,-44(s0)
|
||||
80000338: fe042603 lw a2,-32(s0)
|
||||
8000033c: fe442683 lw a3,-28(s0)
|
||||
80000340: fe842703 lw a4,-24(s0)
|
||||
80000344: fec42783 lw a5,-20(s0)
|
||||
80000348: cc9ff0ef jal ra,80000010 <createThreads>
|
||||
8000034c: 00000073 ecall
|
||||
80000350: 00000013 nop
|
||||
80000354: 02c12083 lw ra,44(sp)
|
||||
80000358: 02812403 lw s0,40(sp)
|
||||
8000035c: 03010113 addi sp,sp,48
|
||||
80000360: 00008067 ret
|
||||
80000344 <reschedule_warps>:
|
||||
80000344: fd010113 addi sp,sp,-48
|
||||
80000348: 02112623 sw ra,44(sp)
|
||||
8000034c: 02812423 sw s0,40(sp)
|
||||
80000350: 03010413 addi s0,sp,48
|
||||
80000354: fb9ff0ef jal ra,8000030c <queue_isEmpty>
|
||||
80000358: 00050793 mv a5,a0
|
||||
8000035c: 00078463 beqz a5,80000364 <reschedule_warps+0x20>
|
||||
80000360: 00000073 ecall
|
||||
80000364: fd440793 addi a5,s0,-44
|
||||
80000368: 00078513 mv a0,a5
|
||||
8000036c: f09ff0ef jal ra,80000274 <queue_dequeue>
|
||||
80000370: fdc42783 lw a5,-36(s0)
|
||||
80000374: 00078113 mv sp,a5
|
||||
80000378: fd842503 lw a0,-40(s0)
|
||||
8000037c: fd442583 lw a1,-44(s0)
|
||||
80000380: fe042603 lw a2,-32(s0)
|
||||
80000384: fe442683 lw a3,-28(s0)
|
||||
80000388: fe842703 lw a4,-24(s0)
|
||||
8000038c: fec42783 lw a5,-20(s0)
|
||||
80000390: c81ff0ef jal ra,80000010 <createThreads>
|
||||
80000394: 00000073 ecall
|
||||
80000398: 00000013 nop
|
||||
8000039c: 02c12083 lw ra,44(sp)
|
||||
800003a0: 02812403 lw s0,40(sp)
|
||||
800003a4: 03010113 addi sp,sp,48
|
||||
800003a8: 00008067 ret
|
||||
|
||||
80000364 <schedule_warps>:
|
||||
80000364: fd010113 addi sp,sp,-48
|
||||
80000368: 02112623 sw ra,44(sp)
|
||||
8000036c: 02812423 sw s0,40(sp)
|
||||
80000370: 03010413 addi s0,sp,48
|
||||
80000374: 00010993 mv s3,sp
|
||||
80000378: 0500006f j 800003c8 <schedule_warps+0x64>
|
||||
8000037c: 810007b7 lui a5,0x81000
|
||||
80000380: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
|
||||
80000384: 0107a783 lw a5,16(a5)
|
||||
80000388: 00178713 addi a4,a5,1
|
||||
8000038c: 810007b7 lui a5,0x81000
|
||||
80000390: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
|
||||
80000394: 00e7a823 sw a4,16(a5)
|
||||
80000398: fd440793 addi a5,s0,-44
|
||||
8000039c: 00078513 mv a0,a5
|
||||
800003a0: e8dff0ef jal ra,8000022c <queue_dequeue>
|
||||
800003a4: fdc42783 lw a5,-36(s0)
|
||||
800003a8: 00078113 mv sp,a5
|
||||
800003ac: fd842503 lw a0,-40(s0)
|
||||
800003b0: fd442583 lw a1,-44(s0)
|
||||
800003b4: fe042603 lw a2,-32(s0)
|
||||
800003b8: fe442683 lw a3,-28(s0)
|
||||
800003bc: fe842703 lw a4,-24(s0)
|
||||
800003c0: fec42783 lw a5,-20(s0)
|
||||
800003c4: c9dff0ef jal ra,80000060 <wspawn>
|
||||
800003c8: efdff0ef jal ra,800002c4 <queue_isEmpty>
|
||||
800003cc: 00050793 mv a5,a0
|
||||
800003d0: 00079863 bnez a5,800003e0 <schedule_warps+0x7c>
|
||||
800003d4: f11ff0ef jal ra,800002e4 <queue_availableWarps>
|
||||
800003d8: 00050793 mv a5,a0
|
||||
800003dc: fa0790e3 bnez a5,8000037c <schedule_warps+0x18>
|
||||
800003e0: 00098113 mv sp,s3
|
||||
800003e4: 00000013 nop
|
||||
800003e8: 02c12083 lw ra,44(sp)
|
||||
800003ec: 02812403 lw s0,40(sp)
|
||||
800003f0: 03010113 addi sp,sp,48
|
||||
800003f4: 00008067 ret
|
||||
800003ac <schedule_warps>:
|
||||
800003ac: fd010113 addi sp,sp,-48
|
||||
800003b0: 02112623 sw ra,44(sp)
|
||||
800003b4: 02812423 sw s0,40(sp)
|
||||
800003b8: 03010413 addi s0,sp,48
|
||||
800003bc: 00010993 mv s3,sp
|
||||
800003c0: 0500006f j 80000410 <schedule_warps+0x64>
|
||||
800003c4: 810007b7 lui a5,0x81000
|
||||
800003c8: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
|
||||
800003cc: 0107a783 lw a5,16(a5)
|
||||
800003d0: 00178713 addi a4,a5,1
|
||||
800003d4: 810007b7 lui a5,0x81000
|
||||
800003d8: 04078793 addi a5,a5,64 # 81000040 <y+0xfffffa34>
|
||||
800003dc: 00e7a823 sw a4,16(a5)
|
||||
800003e0: fd440793 addi a5,s0,-44
|
||||
800003e4: 00078513 mv a0,a5
|
||||
800003e8: e8dff0ef jal ra,80000274 <queue_dequeue>
|
||||
800003ec: fdc42783 lw a5,-36(s0)
|
||||
800003f0: 00078113 mv sp,a5
|
||||
800003f4: fd842503 lw a0,-40(s0)
|
||||
800003f8: fd442583 lw a1,-44(s0)
|
||||
800003fc: fe042603 lw a2,-32(s0)
|
||||
80000400: fe442683 lw a3,-28(s0)
|
||||
80000404: fe842703 lw a4,-24(s0)
|
||||
80000408: fec42783 lw a5,-20(s0)
|
||||
8000040c: c55ff0ef jal ra,80000060 <wspawn>
|
||||
80000410: efdff0ef jal ra,8000030c <queue_isEmpty>
|
||||
80000414: 00050793 mv a5,a0
|
||||
80000418: 00079863 bnez a5,80000428 <schedule_warps+0x7c>
|
||||
8000041c: f11ff0ef jal ra,8000032c <queue_availableWarps>
|
||||
80000420: 00050793 mv a5,a0
|
||||
80000424: fa0790e3 bnez a5,800003c4 <schedule_warps+0x18>
|
||||
80000428: 00098113 mv sp,s3
|
||||
8000042c: 00000013 nop
|
||||
80000430: 02c12083 lw ra,44(sp)
|
||||
80000434: 02812403 lw s0,40(sp)
|
||||
80000438: 03010113 addi sp,sp,48
|
||||
8000043c: 00008067 ret
|
||||
|
||||
800003f8 <sleep>:
|
||||
800003f8: fd010113 addi sp,sp,-48
|
||||
800003fc: 02812623 sw s0,44(sp)
|
||||
80000400: 03010413 addi s0,sp,48
|
||||
80000404: fca42e23 sw a0,-36(s0)
|
||||
80000408: fe042623 sw zero,-20(s0)
|
||||
8000040c: 0100006f j 8000041c <sleep+0x24>
|
||||
80000410: fec42783 lw a5,-20(s0)
|
||||
80000414: 00178793 addi a5,a5,1
|
||||
80000418: fef42623 sw a5,-20(s0)
|
||||
8000041c: fec42703 lw a4,-20(s0)
|
||||
80000420: fdc42783 lw a5,-36(s0)
|
||||
80000424: fef746e3 blt a4,a5,80000410 <sleep+0x18>
|
||||
80000428: 00000013 nop
|
||||
8000042c: 02c12403 lw s0,44(sp)
|
||||
80000430: 03010113 addi sp,sp,48
|
||||
80000434: 00008067 ret
|
||||
80000440 <sleep>:
|
||||
80000440: fd010113 addi sp,sp,-48
|
||||
80000444: 02812623 sw s0,44(sp)
|
||||
80000448: 03010413 addi s0,sp,48
|
||||
8000044c: fca42e23 sw a0,-36(s0)
|
||||
80000450: fe042623 sw zero,-20(s0)
|
||||
80000454: 0100006f j 80000464 <sleep+0x24>
|
||||
80000458: fec42783 lw a5,-20(s0)
|
||||
8000045c: 00178793 addi a5,a5,1
|
||||
80000460: fef42623 sw a5,-20(s0)
|
||||
80000464: fec42703 lw a4,-20(s0)
|
||||
80000468: fdc42783 lw a5,-36(s0)
|
||||
8000046c: fef746e3 blt a4,a5,80000458 <sleep+0x18>
|
||||
80000470: 00000013 nop
|
||||
80000474: 02c12403 lw s0,44(sp)
|
||||
80000478: 03010113 addi sp,sp,48
|
||||
8000047c: 00008067 ret
|
||||
|
||||
80000438 <createWarps>:
|
||||
80000438: fb010113 addi sp,sp,-80
|
||||
8000043c: 04112623 sw ra,76(sp)
|
||||
80000440: 04812423 sw s0,72(sp)
|
||||
80000444: 05010413 addi s0,sp,80
|
||||
80000448: fca42623 sw a0,-52(s0)
|
||||
8000044c: fcb42423 sw a1,-56(s0)
|
||||
80000450: fcc42223 sw a2,-60(s0)
|
||||
80000454: fcd42023 sw a3,-64(s0)
|
||||
80000458: fae42e23 sw a4,-68(s0)
|
||||
8000045c: faf42c23 sw a5,-72(s0)
|
||||
80000460: 00010913 mv s2,sp
|
||||
80000464: fe042623 sw zero,-20(s0)
|
||||
80000468: 05c0006f j 800004c4 <createWarps+0x8c>
|
||||
8000046c: ffff09b7 lui s3,0xffff0
|
||||
80000470: 01310133 add sp,sp,s3
|
||||
80000474: fec42783 lw a5,-20(s0)
|
||||
80000478: fcf42823 sw a5,-48(s0)
|
||||
8000047c: fc842783 lw a5,-56(s0)
|
||||
80000480: fcf42a23 sw a5,-44(s0)
|
||||
80000484: 00010793 mv a5,sp
|
||||
80000488: fcf42c23 sw a5,-40(s0)
|
||||
8000048c: fc442783 lw a5,-60(s0)
|
||||
80000490: fcf42e23 sw a5,-36(s0)
|
||||
80000494: fc042783 lw a5,-64(s0)
|
||||
80000498: fef42023 sw a5,-32(s0)
|
||||
8000049c: fbc42783 lw a5,-68(s0)
|
||||
800004a0: fef42223 sw a5,-28(s0)
|
||||
800004a4: fb842783 lw a5,-72(s0)
|
||||
800004a8: fef42423 sw a5,-24(s0)
|
||||
800004ac: fd040793 addi a5,s0,-48
|
||||
800004b0: 00078513 mv a0,a5
|
||||
800004b4: d05ff0ef jal ra,800001b8 <queue_enqueue>
|
||||
800004b8: fec42783 lw a5,-20(s0)
|
||||
800004bc: 00178793 addi a5,a5,1
|
||||
800004c0: fef42623 sw a5,-20(s0)
|
||||
800004c4: fec42703 lw a4,-20(s0)
|
||||
800004c8: fcc42783 lw a5,-52(s0)
|
||||
800004cc: faf760e3 bltu a4,a5,8000046c <createWarps+0x34>
|
||||
800004d0: 00090113 mv sp,s2
|
||||
800004d4: e91ff0ef jal ra,80000364 <schedule_warps>
|
||||
800004d8: 06400513 li a0,100
|
||||
800004dc: f1dff0ef jal ra,800003f8 <sleep>
|
||||
800004e0: 00000013 nop
|
||||
800004e4: 04c12083 lw ra,76(sp)
|
||||
800004e8: 04812403 lw s0,72(sp)
|
||||
800004ec: 05010113 addi sp,sp,80
|
||||
800004f0: 00008067 ret
|
||||
80000480 <createWarps>:
|
||||
80000480: fb010113 addi sp,sp,-80
|
||||
80000484: 04112623 sw ra,76(sp)
|
||||
80000488: 04812423 sw s0,72(sp)
|
||||
8000048c: 05010413 addi s0,sp,80
|
||||
80000490: fca42623 sw a0,-52(s0)
|
||||
80000494: fcb42423 sw a1,-56(s0)
|
||||
80000498: fcc42223 sw a2,-60(s0)
|
||||
8000049c: fcd42023 sw a3,-64(s0)
|
||||
800004a0: fae42e23 sw a4,-68(s0)
|
||||
800004a4: faf42c23 sw a5,-72(s0)
|
||||
800004a8: 00010913 mv s2,sp
|
||||
800004ac: fe042623 sw zero,-20(s0)
|
||||
800004b0: 05c0006f j 8000050c <createWarps+0x8c>
|
||||
800004b4: ffff09b7 lui s3,0xffff0
|
||||
800004b8: 01310133 add sp,sp,s3
|
||||
800004bc: fec42783 lw a5,-20(s0)
|
||||
800004c0: fcf42823 sw a5,-48(s0)
|
||||
800004c4: fc842783 lw a5,-56(s0)
|
||||
800004c8: fcf42a23 sw a5,-44(s0)
|
||||
800004cc: 00010793 mv a5,sp
|
||||
800004d0: fcf42c23 sw a5,-40(s0)
|
||||
800004d4: fc442783 lw a5,-60(s0)
|
||||
800004d8: fcf42e23 sw a5,-36(s0)
|
||||
800004dc: fc042783 lw a5,-64(s0)
|
||||
800004e0: fef42023 sw a5,-32(s0)
|
||||
800004e4: fbc42783 lw a5,-68(s0)
|
||||
800004e8: fef42223 sw a5,-28(s0)
|
||||
800004ec: fb842783 lw a5,-72(s0)
|
||||
800004f0: fef42423 sw a5,-24(s0)
|
||||
800004f4: fd040793 addi a5,s0,-48
|
||||
800004f8: 00078513 mv a0,a5
|
||||
800004fc: d05ff0ef jal ra,80000200 <queue_enqueue>
|
||||
80000500: fec42783 lw a5,-20(s0)
|
||||
80000504: 00178793 addi a5,a5,1
|
||||
80000508: fef42623 sw a5,-20(s0)
|
||||
8000050c: fec42703 lw a4,-20(s0)
|
||||
80000510: fcc42783 lw a5,-52(s0)
|
||||
80000514: faf760e3 bltu a4,a5,800004b4 <createWarps+0x34>
|
||||
80000518: 00090113 mv sp,s2
|
||||
8000051c: e91ff0ef jal ra,800003ac <schedule_warps>
|
||||
80000520: 06400513 li a0,100
|
||||
80000524: f1dff0ef jal ra,80000440 <sleep>
|
||||
80000528: 00000013 nop
|
||||
8000052c: 04c12083 lw ra,76(sp)
|
||||
80000530: 04812403 lw s0,72(sp)
|
||||
80000534: 05010113 addi sp,sp,80
|
||||
80000538: 00008067 ret
|
||||
|
||||
800004f4 <get_1st_arg>:
|
||||
800004f4: ff010113 addi sp,sp,-16
|
||||
800004f8: 00812623 sw s0,12(sp)
|
||||
800004fc: 01712423 sw s7,8(sp)
|
||||
80000500: 01010413 addi s0,sp,16
|
||||
80000504: 000b8793 mv a5,s7
|
||||
80000508: 00078513 mv a0,a5
|
||||
8000050c: 00c12403 lw s0,12(sp)
|
||||
80000510: 00812b83 lw s7,8(sp)
|
||||
80000514: 01010113 addi sp,sp,16
|
||||
80000518: 00008067 ret
|
||||
8000053c <get_1st_arg>:
|
||||
8000053c: ff010113 addi sp,sp,-16
|
||||
80000540: 00812623 sw s0,12(sp)
|
||||
80000544: 01712423 sw s7,8(sp)
|
||||
80000548: 01010413 addi s0,sp,16
|
||||
8000054c: 000b8793 mv a5,s7
|
||||
80000550: 00078513 mv a0,a5
|
||||
80000554: 00c12403 lw s0,12(sp)
|
||||
80000558: 00812b83 lw s7,8(sp)
|
||||
8000055c: 01010113 addi sp,sp,16
|
||||
80000560: 00008067 ret
|
||||
|
||||
8000051c <get_2nd_arg>:
|
||||
8000051c: ff010113 addi sp,sp,-16
|
||||
80000520: 00812623 sw s0,12(sp)
|
||||
80000524: 01812423 sw s8,8(sp)
|
||||
80000528: 01010413 addi s0,sp,16
|
||||
8000052c: 000c0793 mv a5,s8
|
||||
80000530: 00078513 mv a0,a5
|
||||
80000534: 00c12403 lw s0,12(sp)
|
||||
80000538: 00812c03 lw s8,8(sp)
|
||||
8000053c: 01010113 addi sp,sp,16
|
||||
80000540: 00008067 ret
|
||||
80000564 <get_2nd_arg>:
|
||||
80000564: ff010113 addi sp,sp,-16
|
||||
80000568: 00812623 sw s0,12(sp)
|
||||
8000056c: 01812423 sw s8,8(sp)
|
||||
80000570: 01010413 addi s0,sp,16
|
||||
80000574: 000c0793 mv a5,s8
|
||||
80000578: 00078513 mv a0,a5
|
||||
8000057c: 00c12403 lw s0,12(sp)
|
||||
80000580: 00812c03 lw s8,8(sp)
|
||||
80000584: 01010113 addi sp,sp,16
|
||||
80000588: 00008067 ret
|
||||
|
||||
80000544 <get_3rd_arg>:
|
||||
80000544: ff010113 addi sp,sp,-16
|
||||
80000548: 00812623 sw s0,12(sp)
|
||||
8000054c: 01912423 sw s9,8(sp)
|
||||
80000550: 01010413 addi s0,sp,16
|
||||
80000554: 000c8793 mv a5,s9
|
||||
80000558: 00078513 mv a0,a5
|
||||
8000055c: 00c12403 lw s0,12(sp)
|
||||
80000560: 00812c83 lw s9,8(sp)
|
||||
80000564: 01010113 addi sp,sp,16
|
||||
80000568: 00008067 ret
|
||||
8000058c <get_3rd_arg>:
|
||||
8000058c: ff010113 addi sp,sp,-16
|
||||
80000590: 00812623 sw s0,12(sp)
|
||||
80000594: 01912423 sw s9,8(sp)
|
||||
80000598: 01010413 addi s0,sp,16
|
||||
8000059c: 000c8793 mv a5,s9
|
||||
800005a0: 00078513 mv a0,a5
|
||||
800005a4: 00c12403 lw s0,12(sp)
|
||||
800005a8: 00812c83 lw s9,8(sp)
|
||||
800005ac: 01010113 addi sp,sp,16
|
||||
800005b0: 00008067 ret
|
||||
|
||||
Disassembly of section .bss:
|
||||
|
||||
|
|
Binary file not shown.
|
@ -1,91 +1,96 @@
|
|||
:0200000480007A
|
||||
:1000000037F1FF7FEF00C018EF008006730000009B
|
||||
:1000000037F1FF7FEF00401DEF0080067300000016
|
||||
:10001000938B0600130C0700938C0700130F01004D
|
||||
:100020009303050013051000635C75001301018044
|
||||
:10003000130305006B500300130515006FF0DFFE7E
|
||||
:1000400013010F0013050000930F0600938D0300AA
|
||||
:10005000EBE0BF01170500001305852A6B40050082
|
||||
:10005000EBE0BF01170500001305052F6B400500FD
|
||||
:1000600017030000130303FB6B000300678000000D
|
||||
:10007000130101FF23261100232481001304010131
|
||||
:10008000B707008193870700370700811307C76010
|
||||
:10009000B70600819386C65C370600801306460DBE
|
||||
:1000A0009305100013050001EF0000391300000054
|
||||
:1000B000EF00402193070500E38C07FE9307000043
|
||||
:1000A0009305700013053000EF00803D1300000041
|
||||
:1000B000EF00C02593070500E38C07FE93070000BF
|
||||
:1000C000138507008320C10003248100130101016F
|
||||
:1000D00067800000130101FD232611022324810201
|
||||
:1000E00013040103232EA4FC232CB4FCEF00804056
|
||||
:1000F0002326A4FEEF0080422324A4FEEF008044C8
|
||||
:100100002322A4FE032784FD8327C4FDB307F70041
|
||||
:100110002320F4FE832704FE939727000327C4FEC1
|
||||
:10012000B307F70083A60700832704FE93972700F1
|
||||
:10013000032784FEB307F70003A70700832704FE05
|
||||
:1001400093972700032644FEB307F6003387E600A3
|
||||
:1001500023A0E700832784FD138717009307070078
|
||||
:1001600093971700B387E70093973700B387E700AB
|
||||
:100170009397270013850700EF0000281300000065
|
||||
:100180008320C10203248102130101036780000060
|
||||
:1001900097020001938202EB1303000093037000A7
|
||||
:1001A00023A0620023A2620023A4620023A672009F
|
||||
:1001B00023A862006780000097020001938282E812
|
||||
:1001C00003A382001303130023A4620013834201DC
|
||||
:1001D00083AE420093935E0033037300032E050049
|
||||
:1001E0002320C301032E45002322C301032E8500D3
|
||||
:1001F0002324C301032EC5002326C301032E0501BA
|
||||
:100200002328C301032E4501232AC301032E8501A0
|
||||
:10021000232CC301938E1E00130F20036394EE0161
|
||||
:10022000930E000023A2D201678000009702000114
|
||||
:10023000938242E103A382001303F3FF23A462002D
|
||||
:100240001383420183AE0200930F2003138F0E002D
|
||||
:10025000130F1F006314FF01130F000023A0E2011E
|
||||
:1002600093935E0033037300032E03002320C50124
|
||||
:10027000032E43002322C501032E83002324C5013E
|
||||
:10028000032EC3002326C501032E03012328C50125
|
||||
:10029000032E4301232AC501032E8301232CC5010C
|
||||
:1002A00067800000970200019382C2D903A38200F5
|
||||
:1002B00013050000130E200363146E0013051500D0
|
||||
:1002C00067800000970200019382C2D703A38200D7
|
||||
:1002D00013050000130E000063146E0013051500D3
|
||||
:1002E00067800000970200019382C2D503A3C20079
|
||||
:1002F00083A3020133B5630067800000130101FD91
|
||||
:10030000232611022324810213040103EFF09FFB33
|
||||
:10031000930705006384070073000000930744FD02
|
||||
:1003200013850700EFF09FF08327C4FD13810700BA
|
||||
:10033000032584FD832544FD032604FE832644FE15
|
||||
:10034000032784FE8327C4FEEFF09FCC73000000D8
|
||||
:10035000130000008320C102032481021301010362
|
||||
:1003600067800000130101FD23261102232481026E
|
||||
:1003700013040103930901006F000005B707008112
|
||||
:100380009387070483A7070113871700B707008126
|
||||
:100390009387070423A8E700930744FD138507000C
|
||||
:1003A000EFF0DFE88327C4FD13810700032584FDF8
|
||||
:1003B000832544FD032604FE832644FE032784FE92
|
||||
:1003C0008327C4FEEFF0DFC9EFF0DFEF93070500EE
|
||||
:1003D00063980700EFF01FF193070500E39007FA19
|
||||
:1003E00013810900130000008320C102032481024D
|
||||
:1003F0001301010367800000130101FD2326810220
|
||||
:1004000013040103232EA4FC232604FE6F00000125
|
||||
:100410008327C4FE938717002326F4FE0327C4FE18
|
||||
:100420008327C4FDE346F7FE130000000324C10246
|
||||
:100430001301010367800000130101FB232611044F
|
||||
:1004400023248104130401052326A4FC2324B4FCE3
|
||||
:100450002322C4FC2320D4FC232EE4FA232CF4FA18
|
||||
:1004600013090100232604FE6F00C005B709FFFF32
|
||||
:10047000330131018327C4FE2328F4FC832784FC45
|
||||
:10048000232AF4FC93070100232CF4FC832744FC6B
|
||||
:10049000232EF4FC832704FC2320F4FE8327C4FBD3
|
||||
:1004A0002322F4FE832784FB2324F4FE930704FD18
|
||||
:1004B00013850700EFF05FD08327C4FE93871700F2
|
||||
:1004C0002326F4FE0327C4FE8327C4FCE360F7FA67
|
||||
:1004D00013010900EFF01FE913054006EFF0DFF10B
|
||||
:1004E000130000008320C1040324810413010105CB
|
||||
:1004F00067800000130101FF23268100232471017E
|
||||
:100500001304010193870B00138507000324C10026
|
||||
:10051000832B81001301010167800000130101FF9B
|
||||
:1005200023268100232481011304010193070C0079
|
||||
:10053000138507000324C100032C8100130101016E
|
||||
:1005400067800000130101FF23268100232491010D
|
||||
:100550001304010193870C00138507000324C100D5
|
||||
:0C056000832C8100130101016780000062
|
||||
:1000D00067800000130101FC232E1102232C8102F2
|
||||
:1000E000130401042326A4FC2324B4FCEF000045E0
|
||||
:1000F0002326A4FEEF0000472324A4FEEF000049BE
|
||||
:100100002322A4FE032784FC93070700939737005C
|
||||
:10011000B387E7400327C4FCB307F7002320F4FEAE
|
||||
:10012000832704FE93B7A70093C71700A30FF4FC1F
|
||||
:100130008347F4FD138F0700B7070080938F47199B
|
||||
:100140006B200F007B70FF01832704FE939727002D
|
||||
:100150000327C4FEB307F70083A60700832704FE26
|
||||
:1001600093972700032784FEB307F70003A7070030
|
||||
:10017000832704FE93972700032644FEB307F60067
|
||||
:100180003387E60023A0E700B7070080138E8719A6
|
||||
:1001900067000E00130000006B300000832784FC12
|
||||
:1001A000138717009307070093971700B387E7009B
|
||||
:1001B00093973700B387E7009397270013850700CD
|
||||
:1001C000EF000028130000008320C10303248103F3
|
||||
:1001D000130101046780000097020001938282E608
|
||||
:1001E000130300009303700023A0620023A26200A7
|
||||
:1001F00023A4620023A6720023A862006780000087
|
||||
:1002000097020001938202E403A382001303130008
|
||||
:1002100023A462001383420183AE420093935E00E5
|
||||
:1002200033037300032E05002320C301032E450072
|
||||
:100230002322C301032E85002324C301032EC500FE
|
||||
:100240002326C301032E05012328C301032E4501E4
|
||||
:10025000232AC301032E8501232CC301938E1E0084
|
||||
:10026000130F20036394EE01930E000023A2D2012A
|
||||
:1002700067800000970200019382C2DC03A3820022
|
||||
:100280001303F3FF23A462001383420183AE020031
|
||||
:10029000930F2003138F0E00130F1F006314FF0131
|
||||
:1002A000130F000023A0E20193935E003303730059
|
||||
:1002B000032E03002320C501032E43002322C50182
|
||||
:1002C000032E83002324C501032EC3002326C5016A
|
||||
:1002D000032E03012328C501032E4301232AC50150
|
||||
:1002E000032E8301232CC5016780000097020001C3
|
||||
:1002F000938242D503A3820013050000130E20034E
|
||||
:1003000063146E001305150067800000970200015A
|
||||
:10031000938242D303A3820013050000130E000052
|
||||
:1003200063146E001305150067800000970200013A
|
||||
:10033000938242D103A3C20083A3020133B56300B9
|
||||
:1003400067800000130101FD23261102232481028E
|
||||
:1003500013040103EFF09FFB93070500638407007C
|
||||
:1003600073000000930744FD13850700EFF09FF032
|
||||
:100370008327C4FD13810700032584FD832544FDE5
|
||||
:10038000032604FE832644FE032784FE8327C4FE3F
|
||||
:10039000EFF01FC873000000130000008320C102AB
|
||||
:1003A000032481021301010367800000130101FD92
|
||||
:1003B000232611022324810213040103930901005F
|
||||
:1003C0006F000005B70700819387070483A7070123
|
||||
:1003D00013871700B70700819387070423A8E70056
|
||||
:1003E000930744FD13850700EFF0DFE88327C4FD82
|
||||
:1003F00013810700032584FD832544FD032604FEA5
|
||||
:10040000832644FE032784FE8327C4FEEFF05FC5E6
|
||||
:10041000EFF0DFEF9307050063980700EFF01FF19F
|
||||
:1004200093070500E39007FA138109001300000009
|
||||
:100430008320C102032481021301010367800000AD
|
||||
:10044000130101FD2326810213040103232EA4FCC2
|
||||
:10045000232604FE6F0000018327C4FE9387170044
|
||||
:100460002326F4FE0327C4FE8327C4FDE346F7FEDC
|
||||
:10047000130000000324C102130101036780000080
|
||||
:10048000130101FB23261104232481041304010515
|
||||
:100490002326A4FC2324B4FC2322C4FC2320D4FC64
|
||||
:1004A000232EE4FA232CF4FA13090100232604FE78
|
||||
:1004B0006F00C005B709FFFF330131018327C4FE78
|
||||
:1004C0002328F4FC832784FC232AF4FC93070100EF
|
||||
:1004D000232CF4FC832744FC232EF4FC832704FC08
|
||||
:1004E0002320F4FE8327C4FB2322F4FE832784FB0E
|
||||
:1004F0002324F4FE930704FD13850700EFF05FD07B
|
||||
:100500008327C4FE938717002326F4FE0327C4FE27
|
||||
:100510008327C4FCE360F7FA13010900EFF01FE939
|
||||
:1005200013054006EFF0DFF1130000008320C10443
|
||||
:10053000032481041301010567800000130101FFFA
|
||||
:1005400023268100232471011304010193870B00EA
|
||||
:10055000138507000324C100832B810013010101CF
|
||||
:1005600067800000130101FF2326810023248101FD
|
||||
:100570001304010193070C00138507000324C10035
|
||||
:10058000032C81001301010167800000130101FFAA
|
||||
:1005900023268100232491011304010193870C0079
|
||||
:1005A000138507000324C100832C8100130101017E
|
||||
:0405B0006780000060
|
||||
:02000004810079
|
||||
:1005CC000100000001000000060000000000000017
|
||||
:1005DC000300000001000000010000000200000008
|
||||
|
|
|
@ -71,19 +71,6 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt
|
|||
schedule_warps();
|
||||
|
||||
sleep(100);
|
||||
|
||||
// asm __volatile__("addi t5, sp, 0");
|
||||
|
||||
// for (unsigned i = 1; i < num_Warps; i++)
|
||||
// {
|
||||
// asm __volatile__("addi sp, sp, -2048");
|
||||
// wspawn(num_threads, i, func, x_ptr, y_ptr, z_ptr);
|
||||
// }
|
||||
|
||||
// asm __volatile__("addi sp, t5, 0");
|
||||
|
||||
// createThreads(num_threads, 0, (unsigned) func, x_ptr, y_ptr, z_ptr);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -10,24 +10,23 @@
|
|||
#define ECALL asm __volatile__(".word 0x00000073");
|
||||
#define JMPRT asm __volatile__(".word 0x5406b");
|
||||
#define SPLIT asm __volatile__(".word 0xf206b");
|
||||
#define P_JUMP asm __volatile__(".word 0x1ff706b");
|
||||
#define P_JUMP asm __volatile__(".word 0x1ff707b");
|
||||
#define JOIN asm __volatile__(".word 0x306b");
|
||||
|
||||
|
||||
// #define __if(val) { \
|
||||
#define __if(val) bool temp = !val; \
|
||||
register unsigned p asm("t5") = temp; \
|
||||
register void * e asm("t6") = &&ELSE; \
|
||||
SPLIT; \
|
||||
P_JUMP; \
|
||||
|
||||
// register unsigned p asm("t5") = val; \
|
||||
// register unsigned * e asm("t6") = &&ELSE; \
|
||||
// SPLIT; \
|
||||
// P_JUMP; \
|
||||
|
||||
|
||||
// }
|
||||
#define __else register void * w asm("t3") = &&AFTER; \
|
||||
asm __volatile__("jr t3"); \
|
||||
ELSE: asm __volatile__("nop");
|
||||
|
||||
// #define __else asm __volatile__("j AFTER"); \
|
||||
// ELSE: asm __volatile__("nop");
|
||||
|
||||
// #define __end_if AFTER: JOIN;
|
||||
#define __end_if AFTER:\
|
||||
JOIN;
|
||||
|
||||
|
||||
#define FUNC void (func)(unsigned, unsigned)
|
||||
|
|
|
@ -7,14 +7,14 @@
|
|||
.type queue_initialize, @function
|
||||
.global queue_initialize
|
||||
queue_initialize:
|
||||
la t0, q # loading base address of q
|
||||
li t1, 0 # to initialize variables
|
||||
li t2, 7 # Num of available warps
|
||||
sw t1, 0 (t0) # start_i
|
||||
sw t1, 4 (t0) # end_i
|
||||
sw t1, 8 (t0) # num_j
|
||||
sw t2, 12(t0) # total_warps
|
||||
sw t1, 16(t0) # active_warps
|
||||
la t0, q # loading base address of q
|
||||
li t1, 0 # to initialize variables
|
||||
li t2, A_WARPS # Num of available warps
|
||||
sw t1, 0 (t0) # start_i
|
||||
sw t1, 4 (t0) # end_i
|
||||
sw t1, 8 (t0) # num_j
|
||||
sw t2, 12(t0) # total_warps
|
||||
sw t1, 16(t0) # active_warps
|
||||
ret
|
||||
|
||||
|
||||
|
@ -125,4 +125,3 @@ queue_availableWarps:
|
|||
lw t2, 16(t0) # t2 = active_warps
|
||||
sltu a0, t2, t1
|
||||
ret
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue