mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
Working + Stable - Mutex
This commit is contained in:
parent
96a8615a5f
commit
087a39ccf4
18 changed files with 845 additions and 182676 deletions
|
@ -304,6 +304,7 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx) {
|
|||
case InstType::N_TYPE:
|
||||
break;
|
||||
case InstType::R_TYPE:
|
||||
inst.setPred((code>>shift_rs1) & reg_mask);
|
||||
inst.setDestReg((code>>shift_rd) & reg_mask);
|
||||
inst.setSrcReg((code>>shift_rs1) & reg_mask);
|
||||
inst.setSrcReg((code>>shift_rs2) & reg_mask);
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace Harp {
|
|||
// Entry in the IPDOM Stack
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(
|
||||
unsigned p, const std::vector<std::vector<Reg<bool> > >& m,
|
||||
unsigned p, const std::vector<std::vector<Reg<Word> > >& m,
|
||||
std::vector<bool> &tm, Word pc
|
||||
): pc(pc), fallThrough(false), uni(false)
|
||||
{
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#ifndef __DEBUG_H
|
||||
#define __DEBUG_H
|
||||
|
||||
#define USE_DEBUG 9
|
||||
// #define USE_DEBUG 9
|
||||
|
||||
#ifdef USE_DEBUG
|
||||
#include <iostream>
|
||||
|
|
|
@ -85,17 +85,17 @@ void Instruction::executeOn(Warp &c) {
|
|||
return;
|
||||
}
|
||||
|
||||
/* Also throw exceptions on non-masked divergent branches. */
|
||||
if (instTable[op].controlFlow) {
|
||||
Size t, count, active;
|
||||
for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
|
||||
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
|
||||
if (c.tmask[t]) ++active;
|
||||
}
|
||||
// /* Also throw exceptions on non-masked divergent branches. */
|
||||
// if (instTable[op].controlFlow) {
|
||||
// Size t, count, active;
|
||||
// for (t = 0, count = 0, active = 0; t < c.activeThreads; ++t) {
|
||||
// if ((!predicated || c.pred[t][pred]) && c.tmask[t]) ++count;
|
||||
// if (c.tmask[t]) ++active;
|
||||
// }
|
||||
|
||||
if (count != 0 && count != active)
|
||||
throw DivergentBranchException();
|
||||
}
|
||||
// if (count != 0 && count != active)
|
||||
// throw DivergentBranchException();
|
||||
// }
|
||||
|
||||
Size nextActiveThreads = c.activeThreads;
|
||||
Size wordSz = c.core->a.getWordSize();
|
||||
|
@ -106,14 +106,14 @@ void Instruction::executeOn(Warp &c) {
|
|||
// If we have a load, overwriting a register's contents, we have to make sure
|
||||
// ahead of time it will not fault. Otherwise we may perform an indirect load
|
||||
// by mistake.
|
||||
if (op == L_INST && rdest == rsrc[0]) {
|
||||
for (Size t = 0; t < c.activeThreads; t++) {
|
||||
if ((!predicated || c.pred[t][pred]) && c.tmask[t]) {
|
||||
Word memAddr = c.reg[t][rsrc[0]] + immsrc;
|
||||
c.core->mem.read(memAddr, c.supervisorMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
// if (op == L_INST && rdest == rsrc[0]) {
|
||||
// for (Size t = 0; t < c.activeThreads; t++) {
|
||||
// if ((!predicated || c.pred[t][pred]) && c.tmask[t]) {
|
||||
// Word memAddr = c.reg[t][rsrc[0]] + immsrc;
|
||||
// c.core->mem.read(memAddr, c.supervisorMode);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
bool sjOnce(true), // Has not yet split or joined once.
|
||||
pcSet(false); // PC has already been set
|
||||
|
@ -134,6 +134,8 @@ void Instruction::executeOn(Warp &c) {
|
|||
Word shamt;
|
||||
Word temp;
|
||||
Word data_read;
|
||||
// Word pred;
|
||||
DomStackEntry e(pred, c.reg, c.tmask, c.pc);
|
||||
int op1, op2;
|
||||
switch (op) {
|
||||
|
||||
|
@ -331,16 +333,19 @@ void Instruction::executeOn(Warp &c) {
|
|||
++c.stores;
|
||||
memAddr = reg[rsrc[0]] + immsrc;
|
||||
// std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
|
||||
// std::cout << "FUNC3: " << func3 << "\n";
|
||||
switch (func3)
|
||||
{
|
||||
case 0:
|
||||
// std::cout << "SB\n";
|
||||
c.core->mem.write(memAddr, reg[rsrc[1]] & 0x000000FF, c.supervisorMode, 1);
|
||||
break;
|
||||
case 1:
|
||||
// std::cout << std::hex << "INST: about to write: " << reg[rsrc[1]] << " to " << memAddr << "\n";
|
||||
// std::cout << "SH\n";
|
||||
c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 2);
|
||||
break;
|
||||
case 2:
|
||||
// std::cout << std::hex << "SW: about to write: " << reg[rsrc[1]] << " to " << memAddr << "\n";
|
||||
c.core->mem.write(memAddr, reg[rsrc[1]], c.supervisorMode, 4);
|
||||
break;
|
||||
default:
|
||||
|
@ -525,6 +530,26 @@ void Instruction::executeOn(Warp &c) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
// SPLIT
|
||||
c.domStack.push(c.tmask);
|
||||
c.domStack.push(e);
|
||||
|
||||
for (unsigned i = 0; i < e.tmask.size(); ++i)
|
||||
{
|
||||
c.tmask[i] = !e.tmask[i] && c.tmask[i];
|
||||
}
|
||||
|
||||
break;
|
||||
case 3:
|
||||
// JOIN
|
||||
if (!c.domStack.top().fallThrough) {
|
||||
if (!pcSet) nextPc = c.domStack.top().pc;
|
||||
pcSet = true;
|
||||
}
|
||||
c.tmask = c.domStack.top().tmask;
|
||||
c.domStack.pop();
|
||||
break;
|
||||
case 4:
|
||||
// JMPRT
|
||||
nextActiveThreads = 1;
|
||||
|
@ -534,7 +559,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
case 5:
|
||||
// CLONE
|
||||
// std::cout << "CLONE\n";
|
||||
// std::cout << "CLONING THREAD: " << reg[rsrc[0]] << "\n";
|
||||
// std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
|
||||
c.reg[reg[rsrc[0]]] = reg;
|
||||
break;
|
||||
case 6:
|
||||
|
@ -544,7 +569,15 @@ void Instruction::executeOn(Warp &c) {
|
|||
if (!pcSet) nextPc = reg[rsrc[0]];
|
||||
pcSet = true;
|
||||
// std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
|
||||
// std::cout << "nextPC: " << rsrc[0] << " val: " << reg[rsrc[0]] << "\n";
|
||||
// std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
|
||||
break;
|
||||
case 7:
|
||||
// pred jump reg
|
||||
if (reg[rsrc[0]])
|
||||
{
|
||||
nextPc = reg[rsrc[1]];
|
||||
pcSet = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n";
|
||||
|
|
181672
src/results.txt
181672
src/results.txt
File diff suppressed because it is too large
Load diff
|
@ -9,40 +9,56 @@ void matAddition (unsigned, unsigned);
|
|||
#include "./lib/lib.h"
|
||||
|
||||
|
||||
unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7};
|
||||
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2};
|
||||
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
// unsigned x[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
|
||||
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
// unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
|
||||
// unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
|
||||
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
|
||||
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
|
||||
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
#define NUM_WARPS 8
|
||||
#define NUM_THREADS 2
|
||||
// unsigned x[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
|
||||
// unsigned y[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
#define NUM_WARPS 16
|
||||
#define NUM_THREADS 1
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
initiate_stack();
|
||||
queue_initialize();
|
||||
|
||||
createWarps(NUM_WARPS, NUM_THREADS, matAddition, x, y, z);
|
||||
|
||||
while(!queue_isEmpty()) {}
|
||||
|
||||
ECALL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void matAddition(unsigned tid, unsigned wid)
|
||||
{
|
||||
|
||||
unsigned * x_ptr = get_1st_arg();
|
||||
unsigned * y_ptr = get_2nd_arg();
|
||||
unsigned * z_ptr = get_3rd_arg();
|
||||
|
||||
unsigned i = (wid * NUM_THREADS) + tid;
|
||||
|
||||
// int cond = i < 16;
|
||||
// __if(cond)
|
||||
|
||||
// // DO SOMETHING
|
||||
|
||||
// __else
|
||||
|
||||
// // DO SOMETHING ELSE
|
||||
|
||||
// __end_if
|
||||
|
||||
z_ptr[i] = x_ptr[i] + y_ptr[i];
|
||||
|
||||
sleep((100 * wid)+100);
|
||||
|
||||
return;
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -1,149 +1,99 @@
|
|||
:0200000480007A
|
||||
:10000000130101FF232611002324810013040101A1
|
||||
:10001000EF004046EF000048B707008193870700D4
|
||||
:10002000370700811307C719B70600819386461664
|
||||
:10003000370600801306060793052000130580008D
|
||||
:10004000EF00C02D13000000EF00407E9307050075
|
||||
:10005000E38C07FE73000000930700001385070080
|
||||
:100060008320C10003248100130101016780000087
|
||||
:10007000130101FD2326110223248102130401032D
|
||||
:10008000232EA4FC232CB4FCEF0040372326A4FE2F
|
||||
:10009000EF0040392324A4FEEF00403B2322A4FEBE
|
||||
:1000A000832784FD939717000327C4FDB307F70048
|
||||
:1000B0002320F4FE832704FE939727000327C4FE22
|
||||
:1000C000B307F70083A60700832704FE9397270052
|
||||
:1000D000032784FEB307F70003A70700832704FE66
|
||||
:1000E00093972700032644FEB307F6003387E60004
|
||||
:1000F00023A0E700130000008320C1020324810233
|
||||
:100100001301010367800000130101FC232E81020B
|
||||
:10011000232CA10313040104232EA4FC232CB4FCE0
|
||||
:10012000232AC4FC2328D4FC2326E4FC2324F4FC47
|
||||
:10013000930F05009300060093850500938B06003E
|
||||
:10014000130C0700938C0300130F010013820F00A0
|
||||
:10015000930710002326F4FE6F0000020325C4FE5F
|
||||
:100160000323C4FE130101806B5003008327C4FEE8
|
||||
:10017000938717002326F4FE13870F008327C4FEFE
|
||||
:10018000E3EEE7FC13010F0013050000938F00005E
|
||||
:10019000930D0200EBE0BF01B70700809387472073
|
||||
:1001A000138507006B400500130000000324C10302
|
||||
:1001B000032D81031301010467800000130101FD79
|
||||
:1001C00023268102130401032326A4FE2324B4FE64
|
||||
:1001D0002322C4FE2320D4FE232EE4FC232CF4FC93
|
||||
:1001E00093830700B7070080938787101383070066
|
||||
:1001F0006B000300130000000324C102130101037C
|
||||
:1002000067800000130101FD2326110223248102CF
|
||||
:1002100013040103EF008061930705006384070066
|
||||
:1002200073000000930744FD13850700EF00004BA7
|
||||
:100230008327C4FD13810700032584FD832544FD26
|
||||
:10024000032604FE832644FE032784FE8327C4FE80
|
||||
:10025000EFF09FEB130000008320C1020324810212
|
||||
:100260001301010367800000130101FD2326110221
|
||||
:100270002324810213040103130F01006F004003C4
|
||||
:10028000930744FD13850700EF0040458327C4FD15
|
||||
:1002900013810700032584FD832544FD032604FE06
|
||||
:1002A000832644FE032784FE8327C4FEEFF01FF15C
|
||||
:1002B000EF00C0579307050063980700EF00005A4E
|
||||
:1002C00093070500E39E07FA13010F0013000000D7
|
||||
:1002D0008320C1020324810213010103678000000F
|
||||
:1002E000130101FE232E810013040102232604FEC4
|
||||
:1002F0006F0000018327C4FE938717002326F4FEB6
|
||||
:100300000327C4FE93073006E3D6E7FE1300000080
|
||||
:100310000324C1011301010267800000130101FBE6
|
||||
:100320002326110423248104130401052326A4FC9D
|
||||
:100330002324B4FC2322C4FC2320D4FC232EE4FA7F
|
||||
:10034000232CF4FA130F0100232604FE6F00C005CE
|
||||
:10035000B70FFFFF3301F1018327C4FE2328F4FC0C
|
||||
:10036000832784FC232AF4FC93070100232CF4FC4C
|
||||
:10037000832744FC232EF4FC832704FC2320F4FE73
|
||||
:100380008327C4FB2322F4FE832784FB2324F4FE6B
|
||||
:10039000930704FD13850700EF0080158327C4FE33
|
||||
:1003A000938717002326F4FE0327C4FE8327C4FC8B
|
||||
:1003B000E360F7FA13010F00EFF01FEBEFF05FF2CD
|
||||
:1003C000130000008320C1040324810413010105EC
|
||||
:1003D00067800000130101FF23268100232471019F
|
||||
:1003E0001304010193870B00138507000324C10048
|
||||
:1003F000832B81001301010167800000130101FFBD
|
||||
:1004000023268100232471011304010193870B002B
|
||||
:10041000138507000324C100832B81001301010110
|
||||
:1004200067800000130101FF23268100232481013E
|
||||
:100430001304010193070C00138507000324C10076
|
||||
:10044000032C81001301010167800000130101FFEB
|
||||
:1004500023268100232491011304010193870C00BA
|
||||
:10046000138507000324C100832C810013010101BF
|
||||
:1004700067800000130101FF23268100130401019E
|
||||
:1004800037F1FF7F130000000324C10013010101B5
|
||||
:1004900067800000130101FF23268100130401017E
|
||||
:1004A000B70700819387870323AC0710B707008144
|
||||
:1004B0009387870323AE0710B707008193878703CD
|
||||
:1004C00023A00712B70700819387870313077000E3
|
||||
:1004D00023A2E712B70700819387870323A407129B
|
||||
:1004E000130000000324C100130101016780000014
|
||||
:1004F000130101FE232E8100130401022326A4FE12
|
||||
:10050000B70700819387870383A707121387170014
|
||||
:10051000B70700819387870323A0E712B7070081FD
|
||||
:100520009387870303A7C7118327C4FE83A6070009
|
||||
:10053000370600819307070093973700B387E7409A
|
||||
:100540009397270013078603B387E70023A0D700FC
|
||||
:10055000B70700819387870303A7C7118327C4FECA
|
||||
:1005600083A64700B7070081138687039307070018
|
||||
:1005700093973700B387E74093972700B307F600B8
|
||||
:1005800023A2D700B70700819387870303A7C7116A
|
||||
:100590008327C4FE83A68700B707008113868703DD
|
||||
:1005A0009307070093973700B387E7409397270097
|
||||
:1005B000B307F60023A4D700B7070081938787030A
|
||||
:1005C00003A7C7118327C4FE83A6C700B70700810E
|
||||
:1005D000138687039307070093973700B387E74095
|
||||
:1005E00093972700B307F60023A6D700B70700812B
|
||||
:1005F0009387870303A7C7118327C4FE83A6070138
|
||||
:10060000370600819307070093973700B387E740C9
|
||||
:100610009397270013078603B387E70023A8D70023
|
||||
:10062000B70700819387870303A7C7118327C4FEF9
|
||||
:1006300083A64701B7070081138687039307070046
|
||||
:1006400093973700B387E74093972700B307F600E7
|
||||
:1006500023AAD700B70700819387870303A7C71191
|
||||
:100660008327C4FE83A68701B7070081138687030B
|
||||
:100670009307070093973700B387E74093972700C6
|
||||
:10068000B307F60023ACD700B70700819387870331
|
||||
:1006900083A7C711138717009307900063E2E7024F
|
||||
:1006A000B70700819387870383A7C71113871700B4
|
||||
:1006B000B70700819387870323AEE7106F0000011F
|
||||
:1006C000B70700819387870323AE0710130000004C
|
||||
:1006D0000324C1011301010267800000130101FD21
|
||||
:1006E0002326810213040103232EA4FCB7070081F3
|
||||
:1006F0009387870383A707121387F7FFB707008144
|
||||
:100700009387870323A0E712B707008193878703A6
|
||||
:1007100003A787119307070093973700B387E74034
|
||||
:10072000939727003707008113078703B387E700F4
|
||||
:100730002326F4FEB70700819387870383A78711D9
|
||||
:10074000138717009307900063E2E702B707008161
|
||||
:100750009387870383A7871113871700B707008143
|
||||
:100760009387870323ACE7106F000001B707008170
|
||||
:100770009387870323AC07108327C4FE03A70700D2
|
||||
:100780008327C4FD23A0E7008327C4FE03A74700F7
|
||||
:100790008327C4FD23A2E7008327C4FE03A78700A5
|
||||
:1007A0008327C4FD23A4E7008327C4FE03A7C70053
|
||||
:1007B0008327C4FD23A6E7008327C4FE03A7070100
|
||||
:1007C0008327C4FD23A8E7008327C4FE03A74701AE
|
||||
:1007D0008327C4FD23AAE7008327C4FE03A787015C
|
||||
:1007E0008327C4FD23ACE700130000000324C102EB
|
||||
:1007F0001301010367800000130101FF232681001C
|
||||
:1008000013040101B70700819387870383A70712A9
|
||||
:10081000938767FF93B7170093F7F70F13850700C8
|
||||
:100820000324C1001301010167800000130101FFCF
|
||||
:100830002326810013040101B707008193878703F2
|
||||
:1008400083A7071293B7170093F7F70F13850700D5
|
||||
:100850000324C1001301010167800000130101FF9F
|
||||
:100860002326810013040101B707008193878703C2
|
||||
:1008700003A78712B70700819387870383A74712CF
|
||||
:10088000B337F70093F7F70F138507000324C10070
|
||||
:08089000130101016780000063
|
||||
:1000000037F1FF7FEF00C018EF008006730000009B
|
||||
:10001000938B0600130C0700938C0700130F01004D
|
||||
:100020009303050013051000635C75001301018044
|
||||
:10003000130305006B500300130515006FF0DFFE7E
|
||||
:1000400013010F0013050000930F0600938D0300AA
|
||||
:10005000EBE0BF01170500001305852A6B40050082
|
||||
:1000600017030000130303FB6B000300678000000D
|
||||
:10007000130101FF23261100232481001304010131
|
||||
:10008000B707008193870700370700811307C76010
|
||||
:10009000B70600819386C65C370600801306460DBE
|
||||
:1000A0009305100013050001EF0000391300000054
|
||||
:1000B000EF00402193070500E38C07FE9307000043
|
||||
:1000C000138507008320C10003248100130101016F
|
||||
:1000D00067800000130101FD232611022324810201
|
||||
:1000E00013040103232EA4FC232CB4FCEF00804056
|
||||
:1000F0002326A4FEEF0080422324A4FEEF008044C8
|
||||
:100100002322A4FE032784FD8327C4FDB307F70041
|
||||
:100110002320F4FE832704FE939727000327C4FEC1
|
||||
:10012000B307F70083A60700832704FE93972700F1
|
||||
:10013000032784FEB307F70003A70700832704FE05
|
||||
:1001400093972700032644FEB307F6003387E600A3
|
||||
:1001500023A0E700832784FD138717009307070078
|
||||
:1001600093971700B387E70093973700B387E700AB
|
||||
:100170009397270013850700EF0000281300000065
|
||||
:100180008320C10203248102130101036780000060
|
||||
:1001900097020001938202EB1303000093037000A7
|
||||
:1001A00023A0620023A2620023A4620023A672009F
|
||||
:1001B00023A862006780000097020001938282E812
|
||||
:1001C00003A382001303130023A4620013834201DC
|
||||
:1001D00083AE420093935E0033037300032E050049
|
||||
:1001E0002320C301032E45002322C301032E8500D3
|
||||
:1001F0002324C301032EC5002326C301032E0501BA
|
||||
:100200002328C301032E4501232AC301032E8501A0
|
||||
:10021000232CC301938E1E00130F20036394EE0161
|
||||
:10022000930E000023A2D201678000009702000114
|
||||
:10023000938242E103A382001303F3FF23A462002D
|
||||
:100240001383420183AE0200930F2003138F0E002D
|
||||
:10025000130F1F006314FF01130F000023A0E2011E
|
||||
:1002600093935E0033037300032E03002320C50124
|
||||
:10027000032E43002322C501032E83002324C5013E
|
||||
:10028000032EC3002326C501032E03012328C50125
|
||||
:10029000032E4301232AC501032E8301232CC5010C
|
||||
:1002A00067800000970200019382C2D903A38200F5
|
||||
:1002B00013050000130E200363146E0013051500D0
|
||||
:1002C00067800000970200019382C2D703A38200D7
|
||||
:1002D00013050000130E000063146E0013051500D3
|
||||
:1002E00067800000970200019382C2D503A3C20079
|
||||
:1002F00083A3020133B5630067800000130101FD91
|
||||
:10030000232611022324810213040103EFF09FFB33
|
||||
:10031000930705006384070073000000930744FD02
|
||||
:1003200013850700EFF09FF08327C4FD13810700BA
|
||||
:10033000032584FD832544FD032604FE832644FE15
|
||||
:10034000032784FE8327C4FEEFF09FCC73000000D8
|
||||
:10035000130000008320C102032481021301010362
|
||||
:1003600067800000130101FD23261102232481026E
|
||||
:1003700013040103930901006F000005B707008112
|
||||
:100380009387070483A7070113871700B707008126
|
||||
:100390009387070423A8E700930744FD138507000C
|
||||
:1003A000EFF0DFE88327C4FD13810700032584FDF8
|
||||
:1003B000832544FD032604FE832644FE032784FE92
|
||||
:1003C0008327C4FEEFF0DFC9EFF0DFEF93070500EE
|
||||
:1003D00063980700EFF01FF193070500E39007FA19
|
||||
:1003E00013810900130000008320C102032481024D
|
||||
:1003F0001301010367800000130101FD2326810220
|
||||
:1004000013040103232EA4FC232604FE6F00000125
|
||||
:100410008327C4FE938717002326F4FE0327C4FE18
|
||||
:100420008327C4FDE346F7FE130000000324C10246
|
||||
:100430001301010367800000130101FB232611044F
|
||||
:1004400023248104130401052326A4FC2324B4FCE3
|
||||
:100450002322C4FC2320D4FC232EE4FA232CF4FA18
|
||||
:1004600013090100232604FE6F00C005B709FFFF32
|
||||
:10047000330131018327C4FE2328F4FC832784FC45
|
||||
:10048000232AF4FC93070100232CF4FC832744FC6B
|
||||
:10049000232EF4FC832704FC2320F4FE8327C4FBD3
|
||||
:1004A0002322F4FE832784FB2324F4FE930704FD18
|
||||
:1004B00013850700EFF05FD08327C4FE93871700F2
|
||||
:1004C0002326F4FE0327C4FE8327C4FCE360F7FA67
|
||||
:1004D00013010900EFF01FE913054006EFF0DFF10B
|
||||
:1004E000130000008320C1040324810413010105CB
|
||||
:1004F00067800000130101FF23268100232471017E
|
||||
:100500001304010193870B00138507000324C10026
|
||||
:10051000832B81001301010167800000130101FF9B
|
||||
:1005200023268100232481011304010193070C0079
|
||||
:10053000138507000324C100032C8100130101016E
|
||||
:1005400067800000130101FF23268100232491010D
|
||||
:100550001304010193870C00138507000324C100D5
|
||||
:0C056000832C8100130101016780000062
|
||||
:02000004810079
|
||||
:100164000100000001000000060000000000000083
|
||||
:100174000300000001000000010000000200000074
|
||||
:10018400000000000300000006000000070000005B
|
||||
:10019400050000000700000000000000020000004D
|
||||
:1001A4000200000000000000050000000000000044
|
||||
:1001B4000100000001000000040000000200000033
|
||||
:1001C4000000000000000000030000000200000026
|
||||
:1005CC000100000001000000060000000000000017
|
||||
:1005DC000300000001000000010000000200000008
|
||||
:1005EC0000000000030000000600000007000000EF
|
||||
:1005FC0005000000070000000700000009000000D3
|
||||
:10060C0000000000020000000200000000000000DA
|
||||
:10061C0005000000000000000100000001000000C7
|
||||
:10062C0004000000020000000000000000000000B8
|
||||
:10063C0003000000020000000300000002000000A4
|
||||
:040000058000000077
|
||||
:00000001FF
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
/opt/riscv/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c ./lib/queue.c -o gpgpu_test.elf
|
||||
/opt/riscv/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib ./lib/lib.s gpgpu_test.c ./lib/queue.s ./lib/lib.c -o gpgpu_test.elf
|
||||
/opt/riscv/bin/riscv32-unknown-linux-gnu-objdump -D gpgpu_test.elf > gpgpu_test.dump
|
||||
/opt/riscv/bin/riscv32-unknown-linux-gnu-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex
|
Binary file not shown.
|
@ -1,111 +1,9 @@
|
|||
#include "lib.h"
|
||||
|
||||
void createThreads(unsigned num_threads, unsigned wid, unsigned func_addr, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
|
||||
{
|
||||
|
||||
asm __volatile__("mv t6, a0");
|
||||
asm __volatile__("mv ra, a2");
|
||||
asm __volatile__("mv a1, a1");
|
||||
extern void createThreads(unsigned, unsigned, unsigned, unsigned *, unsigned *, unsigned *);
|
||||
extern void wspawn(unsigned, unsigned, unsigned, unsigned *, unsigned *, unsigned *);
|
||||
|
||||
asm __volatile__("mv s7, a3");
|
||||
asm __volatile__("mv s8, a4");
|
||||
asm __volatile__("mv s9, t2");
|
||||
|
||||
|
||||
asm __volatile__("addi t5, sp, 0");
|
||||
|
||||
register unsigned num_threads_ asm("t6");
|
||||
asm __volatile__("mv tp, t6");
|
||||
for (unsigned i = 1; i < num_threads_; i++)
|
||||
{
|
||||
|
||||
register unsigned cur_tid asm("a0") = i;
|
||||
register unsigned not_sure asm("t1") = i;
|
||||
asm __volatile__("addi sp, sp, -2048");
|
||||
CLONE;
|
||||
}
|
||||
asm __volatile__("addi sp, t5, 0");
|
||||
|
||||
|
||||
register unsigned cur_tid asm("a0") = 0;
|
||||
|
||||
|
||||
// jalis TO FUNC
|
||||
// register unsigned num_lanes asm("t6") = func_addr;
|
||||
// register unsigned link asm("s11") = num_threads;
|
||||
asm __volatile__("mv t6, ra");
|
||||
asm __volatile__("mv s11, tp");
|
||||
|
||||
|
||||
JALRS;
|
||||
|
||||
register unsigned jump_dest asm("a0") = (unsigned) reschedule_warps;
|
||||
JMPRT;
|
||||
|
||||
|
||||
|
||||
// // register unsigned *xx asm("s7") = x_ptr;
|
||||
// // register unsigned *yy asm("s8") = y_ptr;
|
||||
// // register unsigned *zz asm("s9") = z_ptr;
|
||||
// register unsigned wid_ asm("a1") = wid;
|
||||
|
||||
|
||||
// asm __volatile__("addi t5, sp, 0");
|
||||
// for (unsigned i = 1; i < num_threads; i++)
|
||||
// {
|
||||
|
||||
// register unsigned cur_tid asm("a0") = i;
|
||||
// register unsigned not_sure asm("t1") = i;
|
||||
// asm __volatile__("addi sp, sp, -256");
|
||||
// CLONE;
|
||||
// }
|
||||
// asm __volatile__("addi sp, t5, 0");
|
||||
|
||||
|
||||
// register unsigned cur_tid asm("a0") = 0;
|
||||
|
||||
|
||||
// // jalis TO FUNC
|
||||
// register unsigned num_lanes asm("t6") = func_addr;
|
||||
// register unsigned link asm("s11") = num_threads;
|
||||
|
||||
|
||||
// JALRS;
|
||||
|
||||
// register unsigned jump_dest asm("a0") = (unsigned) reschedule_warps;
|
||||
// JMPRT;
|
||||
|
||||
}
|
||||
|
||||
void wspawn(unsigned num_threads, unsigned wid, unsigned func, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
|
||||
{
|
||||
|
||||
asm __volatile__("mv t2, a5");
|
||||
// asm __volatile__("mv t1, a5");
|
||||
|
||||
register unsigned func_add asm("t1") = (unsigned) &createThreads;
|
||||
|
||||
|
||||
|
||||
WSPAWN; // THIS SHOULD COPY THE CSR REGISTERS TO THE NEW WARP
|
||||
|
||||
|
||||
|
||||
// register unsigned *tzz asm("t2") = z_ptr;
|
||||
|
||||
// register unsigned func_add asm("t1") = (unsigned) &createThreads;
|
||||
|
||||
// register unsigned n_threads asm("a0") = num_threads;
|
||||
// register unsigned wwid asm("a1") = wid;
|
||||
// register unsigned ffunc asm("a2") = func;
|
||||
|
||||
// register unsigned *xx asm("a3") = x_ptr;
|
||||
// register unsigned *yy asm("a4") = y_ptr;
|
||||
// register unsigned *zz asm("a5") = tzz;
|
||||
|
||||
// WSPAWN; // THIS SHOULD COPY THE CSR REGISTERS TO THE NEW WARP
|
||||
|
||||
}
|
||||
|
||||
void reschedule_warps()
|
||||
{
|
||||
|
@ -120,25 +18,28 @@ void reschedule_warps()
|
|||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
createThreads(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z);
|
||||
|
||||
ECALL;
|
||||
|
||||
}
|
||||
|
||||
void schedule_warps()
|
||||
{
|
||||
asm __volatile__("mv t5, sp");
|
||||
asm __volatile__("mv s3, sp");
|
||||
while (!queue_isEmpty() && queue_availableWarps())
|
||||
{
|
||||
++q.active_warps;
|
||||
Job j;
|
||||
queue_dequeue(&j);
|
||||
|
||||
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
|
||||
wspawn(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z);
|
||||
}
|
||||
asm __volatile__("mv sp, t5");
|
||||
asm __volatile__("mv sp, s3");
|
||||
}
|
||||
|
||||
void sleep()
|
||||
void sleep(int t)
|
||||
{
|
||||
for(int z = 0; z < 100; z++) {}
|
||||
for(int z = 0; z < t; z++) {}
|
||||
}
|
||||
|
||||
|
||||
|
@ -146,11 +47,11 @@ void sleep()
|
|||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
|
||||
{
|
||||
|
||||
asm __volatile__("addi t5, sp, 0");
|
||||
asm __volatile__("addi s2, sp, 0");
|
||||
for (unsigned i = 0; i < num_Warps; i++)
|
||||
{
|
||||
asm __volatile__("lui t6, 0xFFFF0");
|
||||
asm __volatile__("add sp, sp, t6");
|
||||
asm __volatile__("lui s3, 0xFFFF0");
|
||||
asm __volatile__("add sp, sp, s3");
|
||||
register unsigned stack_ptr asm("sp");
|
||||
|
||||
Job j;
|
||||
|
@ -164,12 +65,12 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt
|
|||
|
||||
queue_enqueue(&j);
|
||||
}
|
||||
asm __volatile__("addi sp, t5, 0");
|
||||
asm __volatile__("addi sp, s2, 0");
|
||||
|
||||
|
||||
schedule_warps();
|
||||
|
||||
sleep();
|
||||
sleep(100);
|
||||
|
||||
// asm __volatile__("addi t5, sp, 0");
|
||||
|
||||
|
@ -186,11 +87,11 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt
|
|||
}
|
||||
|
||||
|
||||
unsigned get_wid()
|
||||
{
|
||||
register unsigned ret asm("s7");
|
||||
return ret;
|
||||
}
|
||||
// unsigned get_wid()
|
||||
// {
|
||||
// register unsigned ret asm("s7");
|
||||
// return ret;
|
||||
// }
|
||||
|
||||
unsigned * get_1st_arg(void)
|
||||
{
|
||||
|
@ -208,7 +109,3 @@ unsigned * get_3rd_arg(void)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void initiate_stack()
|
||||
{
|
||||
asm __volatile__("lui sp,0x7ffff":::);
|
||||
}
|
||||
|
|
|
@ -6,21 +6,38 @@
|
|||
|
||||
#define WSPAWN asm __volatile__(".word 0x3006b"::);
|
||||
#define CLONE asm __volatile__(".word 0x3506b":::);
|
||||
#define JALRS asm __volatile__(".word 0x1bfe0eb":::"s10")
|
||||
#define ECALL asm __volatile__(".word 0x00000073")
|
||||
#define JMPRT asm __volatile__(".word 0x5406b")
|
||||
#define JALRS asm __volatile__(".word 0x1bfe0eb":::"s10");
|
||||
#define ECALL asm __volatile__(".word 0x00000073");
|
||||
#define JMPRT asm __volatile__(".word 0x5406b");
|
||||
#define SPLIT asm __volatile__(".word 0xf206b");
|
||||
#define P_JUMP asm __volatile__(".word 0x1ff706b");
|
||||
#define JOIN asm __volatile__(".word 0x306b");
|
||||
|
||||
|
||||
// #define __if(val) { \
|
||||
|
||||
// register unsigned p asm("t5") = val; \
|
||||
// register unsigned * e asm("t6") = &&ELSE; \
|
||||
// SPLIT; \
|
||||
// P_JUMP; \
|
||||
|
||||
|
||||
// }
|
||||
|
||||
// #define __else asm __volatile__("j AFTER"); \
|
||||
// ELSE: asm __volatile__("nop");
|
||||
|
||||
// #define __end_if AFTER: JOIN;
|
||||
|
||||
|
||||
#define FUNC void (func)(unsigned, unsigned)
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned *, unsigned *, unsigned *);
|
||||
void reschedule_warps(void);
|
||||
|
||||
unsigned get_wid();
|
||||
unsigned * get_1st_arg(void);
|
||||
unsigned * get_2nd_arg(void);
|
||||
unsigned * get_3rd_arg(void);
|
||||
void initiate_stack();
|
||||
|
||||
void sleep(int);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
49
src/riscv_gpgpu/lib/lib.s
Normal file
49
src/riscv_gpgpu/lib/lib.s
Normal file
|
@ -0,0 +1,49 @@
|
|||
|
||||
|
||||
|
||||
.section .text
|
||||
|
||||
.type _start, @function
|
||||
.global _start
|
||||
_start:
|
||||
lui sp, 0x7ffff
|
||||
jal queue_initialize
|
||||
jal main
|
||||
ecall
|
||||
|
||||
.type createThreads, @function
|
||||
.global createThreads
|
||||
createThreads:
|
||||
mv s7,a3 # Moving x_ptr to s7
|
||||
mv s8,a4 # Moving y_ptr to s8
|
||||
mv s9,a5 # Moving z_ptr to s9
|
||||
mv t5,sp # Saving the current stack pointer to t5
|
||||
mv t2, a0 # t2 = num_threads
|
||||
loop_init:
|
||||
li a0,1 # i = 0
|
||||
loop_cond:
|
||||
bge a0, t2, loop_done # i < num_threads
|
||||
loop_body:
|
||||
addi sp,sp,-2048 # Allocate 2k stack for new thread
|
||||
mv t1, a0 # #lane = i
|
||||
.word 0x3506b # clone register state
|
||||
loop_inc:
|
||||
addi a0, a0, 1
|
||||
j loop_cond
|
||||
loop_done:
|
||||
mv sp,t5 # Restoring the stack
|
||||
li a0,0 # setting tid = 0 for main thread
|
||||
mv t6,a2 # setting func_addr
|
||||
mv s11,t2 # setting num_threads to spawn
|
||||
.word 0x1bfe0eb
|
||||
la a0, reschedule_warps
|
||||
.word 0x5406b
|
||||
|
||||
|
||||
|
||||
.type wspawn, @function
|
||||
.global wspawn
|
||||
wspawn:
|
||||
la t1, createThreads
|
||||
.word 0x3006b # WSPAWN instruction
|
||||
ret
|
|
@ -1,73 +0,0 @@
|
|||
|
||||
#include "queue.h"
|
||||
|
||||
void queue_initialize(void)
|
||||
{
|
||||
q.start_i = 0;
|
||||
q.end_i = 0;
|
||||
q.num_j = 0;
|
||||
q.total_warps = 7;
|
||||
q.active_warps = 0;
|
||||
}
|
||||
|
||||
void queue_enqueue(Job * j)
|
||||
{
|
||||
q.num_j++;
|
||||
|
||||
// q.jobs[q.end_i] = j;
|
||||
|
||||
q.jobs[q.end_i].wid = j->wid;
|
||||
q.jobs[q.end_i].n_threads = j->n_threads;
|
||||
q.jobs[q.end_i].base_sp = j->base_sp;
|
||||
q.jobs[q.end_i].func_ptr = j->func_ptr;
|
||||
q.jobs[q.end_i].x = j->x;
|
||||
q.jobs[q.end_i].y = j->y;
|
||||
q.jobs[q.end_i].z = j->z;
|
||||
if ((q.end_i + 1) < SIZE)
|
||||
{
|
||||
q.end_i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
q.end_i = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void queue_dequeue(Job * r)
|
||||
{
|
||||
q.num_j--;
|
||||
Job * j = &(q.jobs[q.start_i]);
|
||||
if ((q.start_i + 1) < SIZE)
|
||||
{
|
||||
q.start_i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
q.start_i = 0;
|
||||
}
|
||||
|
||||
r->wid = j->wid;
|
||||
r->n_threads = j->n_threads;
|
||||
r->base_sp = j->base_sp;
|
||||
r->func_ptr = j->func_ptr;
|
||||
r->x = j->x;
|
||||
r->y = j->y;
|
||||
r->z = j->z;
|
||||
|
||||
}
|
||||
|
||||
int queue_isFull(void)
|
||||
{
|
||||
return (q.num_j == SIZE);
|
||||
}
|
||||
|
||||
int queue_isEmpty(void)
|
||||
{
|
||||
return (q.num_j == 0);
|
||||
}
|
||||
|
||||
int queue_availableWarps()
|
||||
{
|
||||
return (q.active_warps < q.total_warps);
|
||||
}
|
|
@ -5,7 +5,8 @@
|
|||
|
||||
|
||||
|
||||
#define SIZE 10
|
||||
#define SIZE 50
|
||||
#define WARPS 7
|
||||
|
||||
|
||||
typedef struct Job_t
|
||||
|
@ -22,13 +23,12 @@ typedef struct Job_t
|
|||
|
||||
typedef struct Queue_t
|
||||
{
|
||||
|
||||
struct Job_t jobs[SIZE];
|
||||
unsigned start_i;
|
||||
unsigned end_i;
|
||||
unsigned num_j;
|
||||
unsigned total_warps;
|
||||
unsigned active_warps;
|
||||
struct Job_t jobs[SIZE];
|
||||
|
||||
} Queue;
|
||||
|
||||
|
|
128
src/riscv_gpgpu/lib/queue.s
Normal file
128
src/riscv_gpgpu/lib/queue.s
Normal file
|
@ -0,0 +1,128 @@
|
|||
|
||||
.equ A_WARPS, 7
|
||||
.equ SIZE, 50
|
||||
|
||||
.section .text
|
||||
|
||||
.type queue_initialize, @function
|
||||
.global queue_initialize
|
||||
queue_initialize:
|
||||
la t0, q # loading base address of q
|
||||
li t1, 0 # to initialize variables
|
||||
li t2, 7 # Num of available warps
|
||||
sw t1, 0 (t0) # start_i
|
||||
sw t1, 4 (t0) # end_i
|
||||
sw t1, 8 (t0) # num_j
|
||||
sw t2, 12(t0) # total_warps
|
||||
sw t1, 16(t0) # active_warps
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
.type queue_enqueue, @function
|
||||
.global queue_enqueue
|
||||
queue_enqueue:
|
||||
la t0, q # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
addi t1, t1, 1 # ++t1
|
||||
sw t1, 8 (t0) # num_j = t1
|
||||
addi t1, t0, 20 # t1 = jobs_addr
|
||||
lw t4, 4 (t0) # t4 = end_i
|
||||
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
||||
add t1, t1, t2 # jobs + index
|
||||
lw t3, 0 (a0) # wid
|
||||
sw t3, 0 (t1) #
|
||||
lw t3, 4 (a0) # n_threads
|
||||
sw t3, 4 (t1) #
|
||||
lw t3, 8 (a0) # base_sp
|
||||
sw t3, 8 (t1) #
|
||||
lw t3, 12(a0) # func_ptr
|
||||
sw t3, 12(t1) #
|
||||
lw t3, 16(a0) # x
|
||||
sw t3, 16(t1) #
|
||||
lw t3, 20(a0) # y
|
||||
sw t3, 20(t1) #
|
||||
lw t3, 24(a0) # z
|
||||
sw t3, 24(t1) #
|
||||
addi t4, t4, 1 # end_i++
|
||||
li t5, SIZE # size
|
||||
bne t4, t5, ec # if ((q.end_i + 1) == SIZE)
|
||||
mv t4, zero
|
||||
ec:
|
||||
sw t4, 4 (t0) # end_i
|
||||
ret
|
||||
|
||||
|
||||
.type queue_dequeue, @function
|
||||
.global queue_dequeue
|
||||
|
||||
queue_dequeue:
|
||||
la t0, q # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
addi t1, t1, -1 # --t1
|
||||
sw t1, 8 (t0) # num_j = t1
|
||||
addi t1, t0, 20 # t1 = jobs_addr
|
||||
lw t4, 0 (t0) # t4 = start_i
|
||||
li t6, SIZE # size
|
||||
mv t5, t4 # t5 = start_i
|
||||
addi t5, t5, 1 # t5++
|
||||
bne t5, t6, dc # if ((q.start_i + 1) == SIZE)
|
||||
mv t5, zero
|
||||
dc:
|
||||
sw t5, 0(t0) # storing start_i
|
||||
slli t2, t4, 5 # index * 32 [log(sizeof(job))]
|
||||
add t1, t1, t2 # jobs + index
|
||||
lw t3, 0 (t1) # wid
|
||||
sw t3, 0 (a0) #
|
||||
lw t3, 4 (t1) # n_threads
|
||||
sw t3, 4 (a0) #
|
||||
lw t3, 8 (t1) # base_sp
|
||||
sw t3, 8 (a0) #
|
||||
lw t3, 12(t1) # func_ptr
|
||||
sw t3, 12(a0) #
|
||||
lw t3, 16(t1) # x
|
||||
sw t3, 16(a0) #
|
||||
lw t3, 20(t1) # y
|
||||
sw t3, 20(a0) #
|
||||
lw t3, 24(t1) # z
|
||||
sw t3, 24(a0) #
|
||||
ret
|
||||
|
||||
|
||||
.type queue_isFull, @function
|
||||
.global queue_isFull
|
||||
queue_isFull:
|
||||
la t0, q # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
mv a0, zero # ret_val = 0
|
||||
li t3, SIZE # t3 = SIZE
|
||||
bne t3, t1, qf # if (num_j == 1)
|
||||
addi a0, a0, 1 # ret_val = 1;
|
||||
qf:
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.type queue_isEmpty, @function
|
||||
.global queue_isEmpty
|
||||
queue_isEmpty:
|
||||
la t0, q # loading base address of q
|
||||
lw t1, 8 (t0) # t1 = num_j
|
||||
mv a0, zero # ret_val = 0
|
||||
mv t3, zero # t3 = 0
|
||||
bne t3, t1, qe # if (num_j == 0)
|
||||
addi a0, a0, 1 # ret_val = 1;
|
||||
qe:
|
||||
ret
|
||||
|
||||
|
||||
.type queue_availableWarps, @function
|
||||
.global queue_availableWarps
|
||||
queue_availableWarps:
|
||||
la t0, q # loading base address of q
|
||||
lw t1, 12(t0) # t1 = total_warps
|
||||
lw t2, 16(t0) # t2 = active_warps
|
||||
sltu a0, t2, t1
|
||||
ret
|
||||
|
|
@ -7,7 +7,7 @@
|
|||
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
|
||||
"elf32-littleriscv")
|
||||
OUTPUT_ARCH(riscv)
|
||||
ENTRY(main)
|
||||
ENTRY(_start)
|
||||
SECTIONS
|
||||
{
|
||||
. = 0x80000000;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue