Stack smashing when scheduling third warp.

This commit is contained in:
felsabbagh3 2019-02-15 01:45:54 -05:00
parent 337a8669fe
commit 3958beef09
14 changed files with 81019 additions and 482 deletions

View file

@ -4,7 +4,7 @@
#ifndef __DEBUG_H
#define __DEBUG_H
// #define USE_DEBUG 9
#define USE_DEBUG 9
#ifdef USE_DEBUG
#include <iostream>

View file

@ -195,10 +195,10 @@ namespace Harp {
if(mem[address >> 20] == NULL) {
uint8_t* ptr = new uint8_t[1024*1024];
for(uint32_t i = 0;i < 1024*1024;i+=4) {
ptr[i + 0] = 0xFF;
ptr[i + 1] = 0xFF;
ptr[i + 2] = 0xFF;
ptr[i + 3] = 0xFF;
ptr[i + 0] = 0xaa;
ptr[i + 1] = 0xbb;
ptr[i + 2] = 0xcc;
ptr[i + 3] = 0xdd;
}
mem[address >> 20] = ptr;
}

View file

@ -525,6 +525,12 @@ void Instruction::executeOn(Warp &c) {
}
}
break;
case 4:
// JMPRT
nextActiveThreads = 1;
if (!pcSet) nextPc = reg[rsrc[0]];
pcSet = true;
break;
case 5:
// CLONE
// std::cout << "CLONE\n";
@ -545,7 +551,7 @@ void Instruction::executeOn(Warp &c) {
}
break;
default:
cout << "ERROR: Unsupported instruction: " << *this << "\n";
cout << "aERROR: Unsupported instruction: " << *this << "\n";
exit(1);
}
}

View file

@ -101,6 +101,12 @@ void MemoryUnit::ADecoder::write(Addr a, Word w, bool sup, Size wordSize) {
// std::cout << std::hex << "ADecoder::write(Addr " << a << ", w " << w << ", sup " << sup << ", wordSize " << wordSize << "\n";
Word before = m.read(a);
Word new_word = w;
if (a == 0x8000012c)
{
printf("WRITING TO 0x8000012c -> %d\n", w);
}
if (wordSize == 8)
{
r.writeByte(a, &w);

File diff suppressed because it is too large Load diff

View file

@ -9,20 +9,29 @@ void matAddition (unsigned, unsigned);
#include "./lib/lib.h"
unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7};
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2};
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
#define NUM_WARPS 8
// unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
// unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
#define NUM_WARPS 7
#define NUM_THREADS 2
int main()
{
initiate_stack();
queue_initialize();
createWarps(NUM_WARPS, NUM_THREADS, matAddition, x, y, z);
while(!queue_isEmpty()) {}
ECALL;
return 0;
}

View file

@ -9,306 +9,650 @@ Disassembly of section .text:
80000004: 00112623 sw ra,12(sp)
80000008: 00812423 sw s0,8(sp)
8000000c: 01010413 addi s0,sp,16
80000010: 348000ef jal ra,80000358 <initiate_stack>
80000014: 810007b7 lui a5,0x81000
80000018: 00078793 mv a5,a5
8000001c: 81000737 lui a4,0x81000
80000020: 08070713 addi a4,a4,128 # 81000080 <y+0x0>
80000024: 810006b7 lui a3,0x81000
80000028: 04068693 addi a3,a3,64 # 81000040 <y+0xffffffc0>
8000002c: 80000637 lui a2,0x80000
80000030: 05860613 addi a2,a2,88 # 80000058 <y+0xfeffffd8>
80000034: 00200593 li a1,2
80000038: 00800513 li a0,8
8000003c: 1d4000ef jal ra,80000210 <createWarps>
80000040: 00000793 li a5,0
80000044: 00078513 mv a0,a5
80000048: 00c12083 lw ra,12(sp)
8000004c: 00812403 lw s0,8(sp)
80000050: 01010113 addi sp,sp,16
80000054: 00008067 ret
80000010: 468000ef jal ra,80000478 <initiate_stack>
80000014: 484000ef jal ra,80000498 <queue_initialize>
80000018: 810007b7 lui a5,0x81000
8000001c: 00078793 mv a5,a5
80000020: 81000737 lui a4,0x81000
80000024: 19c70713 addi a4,a4,412 # 8100019c <y+0x0>
80000028: 810006b7 lui a3,0x81000
8000002c: 16468693 addi a3,a3,356 # 81000164 <y+0xffffffc8>
80000030: 80000637 lui a2,0x80000
80000034: 07060613 addi a2,a2,112 # 80000070 <y+0xfefffed4>
80000038: 00200593 li a1,2
8000003c: 00700513 li a0,7
80000040: 2e0000ef jal ra,80000320 <createWarps>
80000044: 00000013 nop
80000048: 7e8000ef jal ra,80000830 <queue_isEmpty>
8000004c: 00050793 mv a5,a0
80000050: fe078ce3 beqz a5,80000048 <main+0x48>
80000054: 00000073 ecall
80000058: 00000793 li a5,0
8000005c: 00078513 mv a0,a5
80000060: 00c12083 lw ra,12(sp)
80000064: 00812403 lw s0,8(sp)
80000068: 01010113 addi sp,sp,16
8000006c: 00008067 ret
80000058 <matAddition>:
80000058: fd010113 addi sp,sp,-48
8000005c: 02112623 sw ra,44(sp)
80000060: 02812423 sw s0,40(sp)
80000064: 03010413 addi s0,sp,48
80000068: fca42e23 sw a0,-36(s0)
8000006c: fcb42c23 sw a1,-40(s0)
80000070: 270000ef jal ra,800002e0 <get_1st_arg>
80000074: fea42623 sw a0,-20(s0)
80000078: 290000ef jal ra,80000308 <get_2nd_arg>
8000007c: fea42423 sw a0,-24(s0)
80000080: 2b0000ef jal ra,80000330 <get_3rd_arg>
80000084: fea42223 sw a0,-28(s0)
80000088: fd842783 lw a5,-40(s0)
8000008c: 00179793 slli a5,a5,0x1
80000090: fdc42703 lw a4,-36(s0)
80000094: 00f707b3 add a5,a4,a5
80000098: fef42023 sw a5,-32(s0)
8000009c: fe042783 lw a5,-32(s0)
800000a0: 00279793 slli a5,a5,0x2
800000a4: fec42703 lw a4,-20(s0)
800000a8: 00f707b3 add a5,a4,a5
800000ac: 0007a683 lw a3,0(a5) # 81000000 <y+0xffffff80>
800000b0: fe042783 lw a5,-32(s0)
800000b4: 00279793 slli a5,a5,0x2
800000b8: fe842703 lw a4,-24(s0)
800000bc: 00f707b3 add a5,a4,a5
800000c0: 0007a703 lw a4,0(a5)
800000c4: fe042783 lw a5,-32(s0)
800000c8: 00279793 slli a5,a5,0x2
800000cc: fe442603 lw a2,-28(s0)
800000d0: 00f607b3 add a5,a2,a5
800000d4: 00e68733 add a4,a3,a4
800000d8: 00e7a023 sw a4,0(a5)
800000dc: 00000013 nop
800000e0: 02c12083 lw ra,44(sp)
800000e4: 02812403 lw s0,40(sp)
800000e8: 03010113 addi sp,sp,48
800000ec: 00008067 ret
80000070 <matAddition>:
80000070: fd010113 addi sp,sp,-48
80000074: 02112623 sw ra,44(sp)
80000078: 02812423 sw s0,40(sp)
8000007c: 03010413 addi s0,sp,48
80000080: fca42e23 sw a0,-36(s0)
80000084: fcb42c23 sw a1,-40(s0)
80000088: 378000ef jal ra,80000400 <get_1st_arg>
8000008c: fea42623 sw a0,-20(s0)
80000090: 398000ef jal ra,80000428 <get_2nd_arg>
80000094: fea42423 sw a0,-24(s0)
80000098: 3b8000ef jal ra,80000450 <get_3rd_arg>
8000009c: fea42223 sw a0,-28(s0)
800000a0: fd842783 lw a5,-40(s0)
800000a4: 00179793 slli a5,a5,0x1
800000a8: fdc42703 lw a4,-36(s0)
800000ac: 00f707b3 add a5,a4,a5
800000b0: fef42023 sw a5,-32(s0)
800000b4: fe042783 lw a5,-32(s0)
800000b8: 00279793 slli a5,a5,0x2
800000bc: fec42703 lw a4,-20(s0)
800000c0: 00f707b3 add a5,a4,a5
800000c4: 0007a683 lw a3,0(a5) # 81000000 <y+0xfffffe64>
800000c8: fe042783 lw a5,-32(s0)
800000cc: 00279793 slli a5,a5,0x2
800000d0: fe842703 lw a4,-24(s0)
800000d4: 00f707b3 add a5,a4,a5
800000d8: 0007a703 lw a4,0(a5)
800000dc: fe042783 lw a5,-32(s0)
800000e0: 00279793 slli a5,a5,0x2
800000e4: fe442603 lw a2,-28(s0)
800000e8: 00f607b3 add a5,a2,a5
800000ec: 00e68733 add a4,a3,a4
800000f0: 00e7a023 sw a4,0(a5)
800000f4: 00000013 nop
800000f8: 02c12083 lw ra,44(sp)
800000fc: 02812403 lw s0,40(sp)
80000100: 03010113 addi sp,sp,48
80000104: 00008067 ret
800000f0 <createThreads>:
800000f0: fb010113 addi sp,sp,-80
800000f4: 04812623 sw s0,76(sp)
800000f8: 05212423 sw s2,72(sp)
800000fc: 05312223 sw s3,68(sp)
80000100: 05412023 sw s4,64(sp)
80000104: 03a12e23 sw s10,60(sp)
80000108: 03b12c23 sw s11,56(sp)
8000010c: 05010413 addi s0,sp,80
80000110: fca42623 sw a0,-52(s0)
80000114: fcb42423 sw a1,-56(s0)
80000118: fcc42223 sw a2,-60(s0)
8000011c: fcd42023 sw a3,-64(s0)
80000120: fae42e23 sw a4,-68(s0)
80000124: faf42c23 sw a5,-72(s0)
80000128: fc042903 lw s2,-64(s0)
8000012c: fbc42983 lw s3,-68(s0)
80000130: fb842a03 lw s4,-72(s0)
80000134: fc842583 lw a1,-56(s0)
80000138: 00010f13 mv t5,sp
8000013c: 00100793 li a5,1
80000140: fcf42e23 sw a5,-36(s0)
80000144: 0200006f j 80000164 <createThreads+0x74>
80000148: fdc42503 lw a0,-36(s0)
8000014c: fdc42303 lw t1,-36(s0)
80000150: f0010113 addi sp,sp,-256
80000154: 0003506b 0x3506b
80000158: fdc42783 lw a5,-36(s0)
8000015c: 00178793 addi a5,a5,1
80000160: fcf42e23 sw a5,-36(s0)
80000164: fdc42703 lw a4,-36(s0)
80000168: fcc42783 lw a5,-52(s0)
8000016c: fcf76ee3 bltu a4,a5,80000148 <createThreads+0x58>
80000170: 000f0113 mv sp,t5
80000174: 00000513 li a0,0
80000178: fc442f83 lw t6,-60(s0)
8000017c: fcc42d83 lw s11,-52(s0)
80000180: 01bfe0eb 0x1bfe0eb
80000184: 00000073 ecall
80000188: 00000013 nop
8000018c: 04c12403 lw s0,76(sp)
80000190: 04812903 lw s2,72(sp)
80000194: 04412983 lw s3,68(sp)
80000198: 04012a03 lw s4,64(sp)
8000019c: 03c12d03 lw s10,60(sp)
800001a0: 03812d83 lw s11,56(sp)
800001a4: 05010113 addi sp,sp,80
800001a8: 00008067 ret
80000108 <createThreads>:
80000108: fc010113 addi sp,sp,-64
8000010c: 02812e23 sw s0,60(sp)
80000110: 03a12c23 sw s10,56(sp)
80000114: 03b12a23 sw s11,52(sp)
80000118: 04010413 addi s0,sp,64
8000011c: fca42e23 sw a0,-36(s0)
80000120: fcb42c23 sw a1,-40(s0)
80000124: fcc42a23 sw a2,-44(s0)
80000128: fcd42823 sw a3,-48(s0)
8000012c: fce42623 sw a4,-52(s0)
80000130: fcf42423 sw a5,-56(s0)
80000134: 00050f93 mv t6,a0
80000138: 00058593 mv a1,a1
8000013c: 00068b93 mv s7,a3
80000140: 00070c13 mv s8,a4
80000144: 00078c93 mv s9,a5
80000148: 00010f13 mv t5,sp
8000014c: 00100793 li a5,1
80000150: fef42623 sw a5,-20(s0)
80000154: 0200006f j 80000174 <createThreads+0x6c>
80000158: fec42503 lw a0,-20(s0)
8000015c: fec42303 lw t1,-20(s0)
80000160: 80010113 addi sp,sp,-2048
80000164: 0003506b 0x3506b
80000168: fec42783 lw a5,-20(s0)
8000016c: 00178793 addi a5,a5,1
80000170: fef42623 sw a5,-20(s0)
80000174: 000f8713 mv a4,t6
80000178: fec42783 lw a5,-20(s0)
8000017c: fce7eee3 bltu a5,a4,80000158 <createThreads+0x50>
80000180: 000f0113 mv sp,t5
80000184: 00000513 li a0,0
80000188: fd442f83 lw t6,-44(s0)
8000018c: fdc42d83 lw s11,-36(s0)
80000190: 01bfe0eb 0x1bfe0eb
80000194: 800007b7 lui a5,0x80000
80000198: 20478793 addi a5,a5,516 # 80000204 <y+0xff000068>
8000019c: 00078513 mv a0,a5
800001a0: 0005406b 0x5406b
800001a4: 00000013 nop
800001a8: 03c12403 lw s0,60(sp)
800001ac: 03812d03 lw s10,56(sp)
800001b0: 03412d83 lw s11,52(sp)
800001b4: 04010113 addi sp,sp,64
800001b8: 00008067 ret
800001ac <wspawn>:
800001ac: fd010113 addi sp,sp,-48
800001b0: 02812623 sw s0,44(sp)
800001b4: 03010413 addi s0,sp,48
800001b8: fea42623 sw a0,-20(s0)
800001bc: feb42423 sw a1,-24(s0)
800001c0: fec42223 sw a2,-28(s0)
800001c4: fed42023 sw a3,-32(s0)
800001c8: fce42e23 sw a4,-36(s0)
800001cc: fcf42c23 sw a5,-40(s0)
800001d0: fd842383 lw t2,-40(s0)
800001d4: 800007b7 lui a5,0x80000
800001d8: 0f078793 addi a5,a5,240 # 800000f0 <y+0xff000070>
800001dc: 00078313 mv t1,a5
800001e0: fec42503 lw a0,-20(s0)
800001e4: fe842583 lw a1,-24(s0)
800001e8: fe442783 lw a5,-28(s0)
800001ec: 00078613 mv a2,a5
800001f0: fe042683 lw a3,-32(s0)
800001f4: fdc42703 lw a4,-36(s0)
800001f8: 00038793 mv a5,t2
800001fc: 0003006b 0x3006b
80000200: 00000013 nop
80000204: 02c12403 lw s0,44(sp)
80000208: 03010113 addi sp,sp,48
8000020c: 00008067 ret
800001bc <wspawn>:
800001bc: fd010113 addi sp,sp,-48
800001c0: 02812623 sw s0,44(sp)
800001c4: 03010413 addi s0,sp,48
800001c8: fea42623 sw a0,-20(s0)
800001cc: feb42423 sw a1,-24(s0)
800001d0: fec42223 sw a2,-28(s0)
800001d4: fed42023 sw a3,-32(s0)
800001d8: fce42e23 sw a4,-36(s0)
800001dc: fcf42c23 sw a5,-40(s0)
800001e0: 00078393 mv t2,a5
800001e4: 800007b7 lui a5,0x80000
800001e8: 10878793 addi a5,a5,264 # 80000108 <y+0xfeffff6c>
800001ec: 00078313 mv t1,a5
800001f0: 0003006b 0x3006b
800001f4: 00000013 nop
800001f8: 02c12403 lw s0,44(sp)
800001fc: 03010113 addi sp,sp,48
80000200: 00008067 ret
80000210 <createWarps>:
80000210: fc010113 addi sp,sp,-64
80000214: 02112e23 sw ra,60(sp)
80000218: 02812c23 sw s0,56(sp)
8000021c: 04010413 addi s0,sp,64
80000220: fca42e23 sw a0,-36(s0)
80000224: fcb42c23 sw a1,-40(s0)
80000228: fcc42a23 sw a2,-44(s0)
8000022c: fcd42823 sw a3,-48(s0)
80000230: fce42623 sw a4,-52(s0)
80000234: fcf42423 sw a5,-56(s0)
80000238: 00010f13 mv t5,sp
8000023c: 00100793 li a5,1
80000240: fef42623 sw a5,-20(s0)
80000244: 0300006f j 80000274 <createWarps+0x64>
80000248: 80010113 addi sp,sp,-2048
8000024c: fc842783 lw a5,-56(s0)
80000250: fcc42703 lw a4,-52(s0)
80000254: fd042683 lw a3,-48(s0)
80000258: fd442603 lw a2,-44(s0)
8000025c: fec42583 lw a1,-20(s0)
80000260: fd842503 lw a0,-40(s0)
80000264: f49ff0ef jal ra,800001ac <wspawn>
80000268: fec42783 lw a5,-20(s0)
8000026c: 00178793 addi a5,a5,1
80000270: fef42623 sw a5,-20(s0)
80000274: fec42703 lw a4,-20(s0)
80000278: fdc42783 lw a5,-36(s0)
8000027c: fcf766e3 bltu a4,a5,80000248 <createWarps+0x38>
80000280: 000f0113 mv sp,t5
80000284: fd442603 lw a2,-44(s0)
80000288: fc842783 lw a5,-56(s0)
8000028c: fcc42703 lw a4,-52(s0)
80000290: fd042683 lw a3,-48(s0)
80000294: 00000593 li a1,0
80000298: fd842503 lw a0,-40(s0)
8000029c: e55ff0ef jal ra,800000f0 <createThreads>
800002a0: 00000073 ecall
800002a4: 00000013 nop
800002a8: 03c12083 lw ra,60(sp)
800002ac: 03812403 lw s0,56(sp)
800002b0: 04010113 addi sp,sp,64
800002b4: 00008067 ret
80000204 <reschedule_warps>:
80000204: fd010113 addi sp,sp,-48
80000208: 02112623 sw ra,44(sp)
8000020c: 02812423 sw s0,40(sp)
80000210: 03010413 addi s0,sp,48
80000214: 61c000ef jal ra,80000830 <queue_isEmpty>
80000218: 00050793 mv a5,a0
8000021c: 00078463 beqz a5,80000224 <reschedule_warps+0x20>
80000220: 00000073 ecall
80000224: fd440793 addi a5,s0,-44
80000228: 00078513 mv a0,a5
8000022c: 4b4000ef jal ra,800006e0 <queue_dequeue>
80000230: fdc42783 lw a5,-36(s0)
80000234: 00078113 mv sp,a5
80000238: fd842503 lw a0,-40(s0)
8000023c: fd442583 lw a1,-44(s0)
80000240: fe042603 lw a2,-32(s0)
80000244: fe442683 lw a3,-28(s0)
80000248: fe842703 lw a4,-24(s0)
8000024c: fec42783 lw a5,-20(s0)
80000250: f6dff0ef jal ra,800001bc <wspawn>
80000254: 00000013 nop
80000258: 02c12083 lw ra,44(sp)
8000025c: 02812403 lw s0,40(sp)
80000260: 03010113 addi sp,sp,48
80000264: 00008067 ret
800002b8 <get_wid>:
800002b8: ff010113 addi sp,sp,-16
800002bc: 00812623 sw s0,12(sp)
800002c0: 00912423 sw s1,8(sp)
800002c4: 01010413 addi s0,sp,16
800002c8: 00048793 mv a5,s1
800002cc: 00078513 mv a0,a5
800002d0: 00c12403 lw s0,12(sp)
800002d4: 00812483 lw s1,8(sp)
800002d8: 01010113 addi sp,sp,16
80000268 <schedule_warps>:
80000268: fd010113 addi sp,sp,-48
8000026c: 02112623 sw ra,44(sp)
80000270: 02812423 sw s0,40(sp)
80000274: 03010413 addi s0,sp,48
80000278: 00010f13 mv t5,sp
8000027c: 0340006f j 800002b0 <schedule_warps+0x48>
80000280: fd440793 addi a5,s0,-44
80000284: 00078513 mv a0,a5
80000288: 458000ef jal ra,800006e0 <queue_dequeue>
8000028c: fdc42783 lw a5,-36(s0)
80000290: 00078113 mv sp,a5
80000294: fd842503 lw a0,-40(s0)
80000298: fd442583 lw a1,-44(s0)
8000029c: fe042603 lw a2,-32(s0)
800002a0: fe442683 lw a3,-28(s0)
800002a4: fe842703 lw a4,-24(s0)
800002a8: fec42783 lw a5,-20(s0)
800002ac: f11ff0ef jal ra,800001bc <wspawn>
800002b0: 580000ef jal ra,80000830 <queue_isEmpty>
800002b4: 00050793 mv a5,a0
800002b8: 00079863 bnez a5,800002c8 <schedule_warps+0x60>
800002bc: 5a4000ef jal ra,80000860 <queue_availableWarps>
800002c0: 00050793 mv a5,a0
800002c4: fa079ee3 bnez a5,80000280 <schedule_warps+0x18>
800002c8: 000f0113 mv sp,t5
800002cc: 00000013 nop
800002d0: 02c12083 lw ra,44(sp)
800002d4: 02812403 lw s0,40(sp)
800002d8: 03010113 addi sp,sp,48
800002dc: 00008067 ret
800002e0 <get_1st_arg>:
800002e0: ff010113 addi sp,sp,-16
800002e4: 00812623 sw s0,12(sp)
800002e8: 01212423 sw s2,8(sp)
800002ec: 01010413 addi s0,sp,16
800002f0: 00090793 mv a5,s2
800002f4: 00078513 mv a0,a5
800002f8: 00c12403 lw s0,12(sp)
800002fc: 00812903 lw s2,8(sp)
80000300: 01010113 addi sp,sp,16
80000304: 00008067 ret
800002e0 <sleep>:
800002e0: fe010113 addi sp,sp,-32
800002e4: 00812e23 sw s0,28(sp)
800002e8: 02010413 addi s0,sp,32
800002ec: fe042623 sw zero,-20(s0)
800002f0: 0100006f j 80000300 <sleep+0x20>
800002f4: fec42783 lw a5,-20(s0)
800002f8: 00178793 addi a5,a5,1
800002fc: fef42623 sw a5,-20(s0)
80000300: fec42703 lw a4,-20(s0)
80000304: 000027b7 lui a5,0x2
80000308: 70f78793 addi a5,a5,1807 # 270f <main-0x7fffd8f1>
8000030c: fee7d4e3 bge a5,a4,800002f4 <sleep+0x14>
80000310: 00000013 nop
80000314: 01c12403 lw s0,28(sp)
80000318: 02010113 addi sp,sp,32
8000031c: 00008067 ret
80000308 <get_2nd_arg>:
80000308: ff010113 addi sp,sp,-16
8000030c: 00812623 sw s0,12(sp)
80000310: 01312423 sw s3,8(sp)
80000314: 01010413 addi s0,sp,16
80000318: 00098793 mv a5,s3
8000031c: 00078513 mv a0,a5
80000320: 00c12403 lw s0,12(sp)
80000324: 00812983 lw s3,8(sp)
80000328: 01010113 addi sp,sp,16
8000032c: 00008067 ret
80000320 <createWarps>:
80000320: fb010113 addi sp,sp,-80
80000324: 04112623 sw ra,76(sp)
80000328: 04812423 sw s0,72(sp)
8000032c: 05010413 addi s0,sp,80
80000330: fca42623 sw a0,-52(s0)
80000334: fcb42423 sw a1,-56(s0)
80000338: fcc42223 sw a2,-60(s0)
8000033c: fcd42023 sw a3,-64(s0)
80000340: fae42e23 sw a4,-68(s0)
80000344: faf42c23 sw a5,-72(s0)
80000348: 00010f13 mv t5,sp
8000034c: fe042623 sw zero,-20(s0)
80000350: 05c0006f j 800003ac <createWarps+0x8c>
80000354: ffff0fb7 lui t6,0xffff0
80000358: 01f10133 add sp,sp,t6
8000035c: fec42783 lw a5,-20(s0)
80000360: fcf42823 sw a5,-48(s0)
80000364: fc842783 lw a5,-56(s0)
80000368: fcf42a23 sw a5,-44(s0)
8000036c: 00010793 mv a5,sp
80000370: fcf42c23 sw a5,-40(s0)
80000374: fc442783 lw a5,-60(s0)
80000378: fcf42e23 sw a5,-36(s0)
8000037c: fc042783 lw a5,-64(s0)
80000380: fef42023 sw a5,-32(s0)
80000384: fbc42783 lw a5,-68(s0)
80000388: fef42223 sw a5,-28(s0)
8000038c: fb842783 lw a5,-72(s0)
80000390: fef42423 sw a5,-24(s0)
80000394: fd040793 addi a5,s0,-48
80000398: 00078513 mv a0,a5
8000039c: 158000ef jal ra,800004f4 <queue_enqueue>
800003a0: fec42783 lw a5,-20(s0)
800003a4: 00178793 addi a5,a5,1
800003a8: fef42623 sw a5,-20(s0)
800003ac: fec42703 lw a4,-20(s0)
800003b0: fcc42783 lw a5,-52(s0)
800003b4: faf760e3 bltu a4,a5,80000354 <createWarps+0x34>
800003b8: 000f0113 mv sp,t5
800003bc: eadff0ef jal ra,80000268 <schedule_warps>
800003c0: f21ff0ef jal ra,800002e0 <sleep>
800003c4: 00000013 nop
800003c8: 04c12083 lw ra,76(sp)
800003cc: 04812403 lw s0,72(sp)
800003d0: 05010113 addi sp,sp,80
800003d4: 00008067 ret
80000330 <get_3rd_arg>:
80000330: ff010113 addi sp,sp,-16
80000334: 00812623 sw s0,12(sp)
80000338: 01412423 sw s4,8(sp)
8000033c: 01010413 addi s0,sp,16
80000340: 000a0793 mv a5,s4
80000344: 00078513 mv a0,a5
80000348: 00c12403 lw s0,12(sp)
8000034c: 00812a03 lw s4,8(sp)
80000350: 01010113 addi sp,sp,16
80000354: 00008067 ret
800003d8 <get_wid>:
800003d8: ff010113 addi sp,sp,-16
800003dc: 00812623 sw s0,12(sp)
800003e0: 01712423 sw s7,8(sp)
800003e4: 01010413 addi s0,sp,16
800003e8: 000b8793 mv a5,s7
800003ec: 00078513 mv a0,a5
800003f0: 00c12403 lw s0,12(sp)
800003f4: 00812b83 lw s7,8(sp)
800003f8: 01010113 addi sp,sp,16
800003fc: 00008067 ret
80000358 <initiate_stack>:
80000358: ff010113 addi sp,sp,-16
8000035c: 00812623 sw s0,12(sp)
80000360: 01010413 addi s0,sp,16
80000364: 7ffff137 lui sp,0x7ffff
80000368: 00000013 nop
8000036c: 00c12403 lw s0,12(sp) # 7ffff00c <main-0xff4>
80000370: 01010113 addi sp,sp,16
80000374: 00008067 ret
80000400 <get_1st_arg>:
80000400: ff010113 addi sp,sp,-16
80000404: 00812623 sw s0,12(sp)
80000408: 01712423 sw s7,8(sp)
8000040c: 01010413 addi s0,sp,16
80000410: 000b8793 mv a5,s7
80000414: 00078513 mv a0,a5
80000418: 00c12403 lw s0,12(sp)
8000041c: 00812b83 lw s7,8(sp)
80000420: 01010113 addi sp,sp,16
80000424: 00008067 ret
80000428 <get_2nd_arg>:
80000428: ff010113 addi sp,sp,-16
8000042c: 00812623 sw s0,12(sp)
80000430: 01812423 sw s8,8(sp)
80000434: 01010413 addi s0,sp,16
80000438: 000c0793 mv a5,s8
8000043c: 00078513 mv a0,a5
80000440: 00c12403 lw s0,12(sp)
80000444: 00812c03 lw s8,8(sp)
80000448: 01010113 addi sp,sp,16
8000044c: 00008067 ret
80000450 <get_3rd_arg>:
80000450: ff010113 addi sp,sp,-16
80000454: 00812623 sw s0,12(sp)
80000458: 01912423 sw s9,8(sp)
8000045c: 01010413 addi s0,sp,16
80000460: 000c8793 mv a5,s9
80000464: 00078513 mv a0,a5
80000468: 00c12403 lw s0,12(sp)
8000046c: 00812c83 lw s9,8(sp)
80000470: 01010113 addi sp,sp,16
80000474: 00008067 ret
80000478 <initiate_stack>:
80000478: ff010113 addi sp,sp,-16
8000047c: 00812623 sw s0,12(sp)
80000480: 01010413 addi s0,sp,16
80000484: 7ffff137 lui sp,0x7ffff
80000488: 00000013 nop
8000048c: 00c12403 lw s0,12(sp) # 7ffff00c <main-0xff4>
80000490: 01010113 addi sp,sp,16
80000494: 00008067 ret
80000498 <queue_initialize>:
80000498: ff010113 addi sp,sp,-16
8000049c: 00812623 sw s0,12(sp)
800004a0: 01010413 addi s0,sp,16
800004a4: 810007b7 lui a5,0x81000
800004a8: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800004ac: 1007ac23 sw zero,280(a5)
800004b0: 810007b7 lui a5,0x81000
800004b4: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800004b8: 1007ae23 sw zero,284(a5)
800004bc: 810007b7 lui a5,0x81000
800004c0: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800004c4: 1207a023 sw zero,288(a5)
800004c8: 810007b7 lui a5,0x81000
800004cc: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800004d0: 00700713 li a4,7
800004d4: 12e7a223 sw a4,292(a5)
800004d8: 810007b7 lui a5,0x81000
800004dc: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800004e0: 1207a423 sw zero,296(a5)
800004e4: 00000013 nop
800004e8: 00c12403 lw s0,12(sp)
800004ec: 01010113 addi sp,sp,16
800004f0: 00008067 ret
800004f4 <queue_enqueue>:
800004f4: fe010113 addi sp,sp,-32
800004f8: 00812e23 sw s0,28(sp)
800004fc: 02010413 addi s0,sp,32
80000500: fea42623 sw a0,-20(s0)
80000504: 810007b7 lui a5,0x81000
80000508: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
8000050c: 1207a783 lw a5,288(a5)
80000510: 00178713 addi a4,a5,1
80000514: 810007b7 lui a5,0x81000
80000518: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
8000051c: 12e7a023 sw a4,288(a5)
80000520: 810007b7 lui a5,0x81000
80000524: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000528: 11c7a703 lw a4,284(a5)
8000052c: fec42783 lw a5,-20(s0)
80000530: 0007a683 lw a3,0(a5)
80000534: 81000637 lui a2,0x81000
80000538: 00070793 mv a5,a4
8000053c: 00379793 slli a5,a5,0x3
80000540: 40e787b3 sub a5,a5,a4
80000544: 00279793 slli a5,a5,0x2
80000548: 03860713 addi a4,a2,56 # 81000038 <y+0xfffffe9c>
8000054c: 00e787b3 add a5,a5,a4
80000550: 00d7a023 sw a3,0(a5)
80000554: 810007b7 lui a5,0x81000
80000558: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
8000055c: 11c7a703 lw a4,284(a5)
80000560: fec42783 lw a5,-20(s0)
80000564: 0047a683 lw a3,4(a5)
80000568: 810007b7 lui a5,0x81000
8000056c: 03878613 addi a2,a5,56 # 81000038 <y+0xfffffe9c>
80000570: 00070793 mv a5,a4
80000574: 00379793 slli a5,a5,0x3
80000578: 40e787b3 sub a5,a5,a4
8000057c: 00279793 slli a5,a5,0x2
80000580: 00f607b3 add a5,a2,a5
80000584: 00d7a223 sw a3,4(a5)
80000588: 810007b7 lui a5,0x81000
8000058c: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000590: 11c7a703 lw a4,284(a5)
80000594: fec42783 lw a5,-20(s0)
80000598: 0087a683 lw a3,8(a5)
8000059c: 810007b7 lui a5,0x81000
800005a0: 03878613 addi a2,a5,56 # 81000038 <y+0xfffffe9c>
800005a4: 00070793 mv a5,a4
800005a8: 00379793 slli a5,a5,0x3
800005ac: 40e787b3 sub a5,a5,a4
800005b0: 00279793 slli a5,a5,0x2
800005b4: 00f607b3 add a5,a2,a5
800005b8: 00d7a423 sw a3,8(a5)
800005bc: 810007b7 lui a5,0x81000
800005c0: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800005c4: 11c7a703 lw a4,284(a5)
800005c8: fec42783 lw a5,-20(s0)
800005cc: 00c7a683 lw a3,12(a5)
800005d0: 810007b7 lui a5,0x81000
800005d4: 03878613 addi a2,a5,56 # 81000038 <y+0xfffffe9c>
800005d8: 00070793 mv a5,a4
800005dc: 00379793 slli a5,a5,0x3
800005e0: 40e787b3 sub a5,a5,a4
800005e4: 00279793 slli a5,a5,0x2
800005e8: 00f607b3 add a5,a2,a5
800005ec: 00d7a623 sw a3,12(a5)
800005f0: 810007b7 lui a5,0x81000
800005f4: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800005f8: 11c7a703 lw a4,284(a5)
800005fc: fec42783 lw a5,-20(s0)
80000600: 0107a683 lw a3,16(a5)
80000604: 81000637 lui a2,0x81000
80000608: 00070793 mv a5,a4
8000060c: 00379793 slli a5,a5,0x3
80000610: 40e787b3 sub a5,a5,a4
80000614: 00279793 slli a5,a5,0x2
80000618: 03860713 addi a4,a2,56 # 81000038 <y+0xfffffe9c>
8000061c: 00e787b3 add a5,a5,a4
80000620: 00d7a823 sw a3,16(a5)
80000624: 810007b7 lui a5,0x81000
80000628: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
8000062c: 11c7a703 lw a4,284(a5)
80000630: fec42783 lw a5,-20(s0)
80000634: 0147a683 lw a3,20(a5)
80000638: 810007b7 lui a5,0x81000
8000063c: 03878613 addi a2,a5,56 # 81000038 <y+0xfffffe9c>
80000640: 00070793 mv a5,a4
80000644: 00379793 slli a5,a5,0x3
80000648: 40e787b3 sub a5,a5,a4
8000064c: 00279793 slli a5,a5,0x2
80000650: 00f607b3 add a5,a2,a5
80000654: 00d7aa23 sw a3,20(a5)
80000658: 810007b7 lui a5,0x81000
8000065c: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000660: 11c7a703 lw a4,284(a5)
80000664: fec42783 lw a5,-20(s0)
80000668: 0187a683 lw a3,24(a5)
8000066c: 810007b7 lui a5,0x81000
80000670: 03878613 addi a2,a5,56 # 81000038 <y+0xfffffe9c>
80000674: 00070793 mv a5,a4
80000678: 00379793 slli a5,a5,0x3
8000067c: 40e787b3 sub a5,a5,a4
80000680: 00279793 slli a5,a5,0x2
80000684: 00f607b3 add a5,a2,a5
80000688: 00d7ac23 sw a3,24(a5)
8000068c: 810007b7 lui a5,0x81000
80000690: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000694: 11c7a783 lw a5,284(a5)
80000698: 00178713 addi a4,a5,1
8000069c: 00900793 li a5,9
800006a0: 02e7e263 bltu a5,a4,800006c4 <queue_enqueue+0x1d0>
800006a4: 810007b7 lui a5,0x81000
800006a8: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800006ac: 11c7a783 lw a5,284(a5)
800006b0: 00178713 addi a4,a5,1
800006b4: 810007b7 lui a5,0x81000
800006b8: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800006bc: 10e7ae23 sw a4,284(a5)
800006c0: 0100006f j 800006d0 <queue_enqueue+0x1dc>
800006c4: 810007b7 lui a5,0x81000
800006c8: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800006cc: 1007ae23 sw zero,284(a5)
800006d0: 00000013 nop
800006d4: 01c12403 lw s0,28(sp)
800006d8: 02010113 addi sp,sp,32
800006dc: 00008067 ret
800006e0 <queue_dequeue>:
800006e0: fd010113 addi sp,sp,-48
800006e4: 02812623 sw s0,44(sp)
800006e8: 03010413 addi s0,sp,48
800006ec: fca42e23 sw a0,-36(s0)
800006f0: 810007b7 lui a5,0x81000
800006f4: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
800006f8: 1207a783 lw a5,288(a5)
800006fc: fff78713 addi a4,a5,-1
80000700: 810007b7 lui a5,0x81000
80000704: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000708: 12e7a023 sw a4,288(a5)
8000070c: 810007b7 lui a5,0x81000
80000710: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000714: 1187a703 lw a4,280(a5)
80000718: 00070793 mv a5,a4
8000071c: 00379793 slli a5,a5,0x3
80000720: 40e787b3 sub a5,a5,a4
80000724: 00279793 slli a5,a5,0x2
80000728: 81000737 lui a4,0x81000
8000072c: 03870713 addi a4,a4,56 # 81000038 <y+0xfffffe9c>
80000730: 00e787b3 add a5,a5,a4
80000734: fef42623 sw a5,-20(s0)
80000738: 810007b7 lui a5,0x81000
8000073c: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000740: 1187a783 lw a5,280(a5)
80000744: 00178713 addi a4,a5,1
80000748: 00900793 li a5,9
8000074c: 02e7e263 bltu a5,a4,80000770 <queue_dequeue+0x90>
80000750: 810007b7 lui a5,0x81000
80000754: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000758: 1187a783 lw a5,280(a5)
8000075c: 00178713 addi a4,a5,1
80000760: 810007b7 lui a5,0x81000
80000764: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000768: 10e7ac23 sw a4,280(a5)
8000076c: 0100006f j 8000077c <queue_dequeue+0x9c>
80000770: 810007b7 lui a5,0x81000
80000774: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000778: 1007ac23 sw zero,280(a5)
8000077c: fec42783 lw a5,-20(s0)
80000780: 0007a703 lw a4,0(a5)
80000784: fdc42783 lw a5,-36(s0)
80000788: 00e7a023 sw a4,0(a5)
8000078c: fec42783 lw a5,-20(s0)
80000790: 0047a703 lw a4,4(a5)
80000794: fdc42783 lw a5,-36(s0)
80000798: 00e7a223 sw a4,4(a5)
8000079c: fec42783 lw a5,-20(s0)
800007a0: 0087a703 lw a4,8(a5)
800007a4: fdc42783 lw a5,-36(s0)
800007a8: 00e7a423 sw a4,8(a5)
800007ac: fec42783 lw a5,-20(s0)
800007b0: 00c7a703 lw a4,12(a5)
800007b4: fdc42783 lw a5,-36(s0)
800007b8: 00e7a623 sw a4,12(a5)
800007bc: fec42783 lw a5,-20(s0)
800007c0: 0107a703 lw a4,16(a5)
800007c4: fdc42783 lw a5,-36(s0)
800007c8: 00e7a823 sw a4,16(a5)
800007cc: fec42783 lw a5,-20(s0)
800007d0: 0147a703 lw a4,20(a5)
800007d4: fdc42783 lw a5,-36(s0)
800007d8: 00e7aa23 sw a4,20(a5)
800007dc: fec42783 lw a5,-20(s0)
800007e0: 0187a703 lw a4,24(a5)
800007e4: fdc42783 lw a5,-36(s0)
800007e8: 00e7ac23 sw a4,24(a5)
800007ec: 00000013 nop
800007f0: 02c12403 lw s0,44(sp)
800007f4: 03010113 addi sp,sp,48
800007f8: 00008067 ret
800007fc <queue_isFull>:
800007fc: ff010113 addi sp,sp,-16
80000800: 00812623 sw s0,12(sp)
80000804: 01010413 addi s0,sp,16
80000808: 810007b7 lui a5,0x81000
8000080c: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000810: 1207a783 lw a5,288(a5)
80000814: ff678793 addi a5,a5,-10
80000818: 0017b793 seqz a5,a5
8000081c: 0ff7f793 andi a5,a5,255
80000820: 00078513 mv a0,a5
80000824: 00c12403 lw s0,12(sp)
80000828: 01010113 addi sp,sp,16
8000082c: 00008067 ret
80000830 <queue_isEmpty>:
80000830: ff010113 addi sp,sp,-16
80000834: 00812623 sw s0,12(sp)
80000838: 01010413 addi s0,sp,16
8000083c: 810007b7 lui a5,0x81000
80000840: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000844: 1207a783 lw a5,288(a5)
80000848: 0017b793 seqz a5,a5
8000084c: 0ff7f793 andi a5,a5,255
80000850: 00078513 mv a0,a5
80000854: 00c12403 lw s0,12(sp)
80000858: 01010113 addi sp,sp,16
8000085c: 00008067 ret
80000860 <queue_availableWarps>:
80000860: ff010113 addi sp,sp,-16
80000864: 00812623 sw s0,12(sp)
80000868: 01010413 addi s0,sp,16
8000086c: 810007b7 lui a5,0x81000
80000870: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000874: 1287a703 lw a4,296(a5)
80000878: 810007b7 lui a5,0x81000
8000087c: 03878793 addi a5,a5,56 # 81000038 <y+0xfffffe9c>
80000880: 1247a783 lw a5,292(a5)
80000884: 00f737b3 sltu a5,a4,a5
80000888: 0ff7f793 andi a5,a5,255
8000088c: 00078513 mv a0,a5
80000890: 00c12403 lw s0,12(sp)
80000894: 01010113 addi sp,sp,16
80000898: 00008067 ret
Disassembly of section .bss:
81000000 <z>:
...
81000038 <q>:
...
Disassembly of section .data:
81000040 <x>:
81000040: 0001 nop
81000042: 0000 unimp
81000044: 0001 nop
81000046: 0000 unimp
81000048: 0006 c.slli zero,0x1
8100004a: 0000 unimp
8100004c: 0000 unimp
8100004e: 0000 unimp
81000050: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
81000054: 0001 nop
81000056: 0000 unimp
81000058: 0001 nop
8100005a: 0000 unimp
8100005c: 0002 c.slli64 zero
8100005e: 0000 unimp
81000060: 0000 unimp
81000062: 0000 unimp
81000064: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
81000068: 0006 c.slli zero,0x1
8100006a: 0000 unimp
8100006c: 00000007 0x7
81000070: 0005 c.nop 1
81000072: 0000 unimp
81000074: 00000007 0x7
81000078: 00000007 0x7
8100007c: 0009 c.nop 2
...
81000164 <x>:
81000164: 0001 nop
81000166: 0000 unimp
81000168: 0001 nop
8100016a: 0000 unimp
8100016c: 0006 c.slli zero,0x1
8100016e: 0000 unimp
81000170: 0000 unimp
81000172: 0000 unimp
81000174: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
81000178: 0001 nop
8100017a: 0000 unimp
8100017c: 0001 nop
8100017e: 0000 unimp
81000180: 0002 c.slli64 zero
81000182: 0000 unimp
81000184: 0000 unimp
81000186: 0000 unimp
81000188: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
8100018c: 0006 c.slli zero,0x1
8100018e: 0000 unimp
81000190: 00000007 0x7
81000194: 0005 c.nop 1
81000196: 0000 unimp
81000198: 00000007 0x7
81000080 <y>:
81000080: 0000 unimp
81000082: 0000 unimp
81000084: 0002 c.slli64 zero
81000086: 0000 unimp
81000088: 0002 c.slli64 zero
8100008a: 0000 unimp
8100008c: 0000 unimp
8100008e: 0000 unimp
81000090: 0005 c.nop 1
81000092: 0000 unimp
81000094: 0000 unimp
81000096: 0000 unimp
81000098: 0001 nop
8100009a: 0000 unimp
8100009c: 0001 nop
8100009e: 0000 unimp
810000a0: 0004 0x4
810000a2: 0000 unimp
810000a4: 0002 c.slli64 zero
8100019c <y>:
8100019c: 0000 unimp
8100019e: 0000 unimp
810001a0: 0002 c.slli64 zero
810001a2: 0000 unimp
810001a4: 0002 c.slli64 zero
810001a6: 0000 unimp
810001a8: 0000 unimp
810001aa: 0000 unimp
810001ac: 0005 c.nop 1
810001ae: 0000 unimp
810001b0: 0000 unimp
810001b2: 0000 unimp
810001b4: 0001 nop
810001b6: 0000 unimp
810001b8: 0001 nop
810001ba: 0000 unimp
810001bc: 0004 0x4
810001be: 0000 unimp
810001c0: 0002 c.slli64 zero
...
810000ae: 0000 unimp
810000b0: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
810000b4: 0002 c.slli64 zero
810000b6: 0000 unimp
810000b8: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
810000bc: 0002 c.slli64 zero
810001ca: 0000 unimp
810001cc: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
810001d0: 0002 c.slli64 zero
...
Disassembly of section .comment:

Binary file not shown.

View file

@ -1,68 +1,149 @@
:0200000480007A
:10000000130101FF232611002324810013040101A1
:10001000EF008034B707008193870700370700811E
:1000200013070708B7060081938606043706008089
:10003000130686059305200013058000EF00401D80
:1000400093070000138507008320C100032481006B
:100050001301010167800000130101FD2326110235
:100060002324810213040103232EA4FC232CB4FCBB
:10007000EF0000272326A4FEEF0000292324A4FE7E
:10008000EF00002B2322A4FE832784FD9397170003
:100090000327C4FDB307F7002320F4FE832704FEE3
:1000A000939727000327C4FEB307F70083A6070032
:1000B000832704FE93972700032784FEB307F700E6
:1000C00003A70700832704FE93972700032644FE17
:1000D000B307F6003387E60023A0E7001300000013
:1000E0008320C10203248102130101036780000001
:1000F000130101FB2326810423242105232231053A
:1001000023204105232EA103232CB1031304010551
:100110002326A4FC2324B4FC2322C4FC2320D4FCE7
:10012000232EE4FA232CF4FA032904FC8329C4FBCC
:10013000032A84FB832584FC130F0100930710001E
:10014000232EF4FC6F0000020325C4FD0323C4FD2D
:10015000130101F06B5003008327C4FD9387170040
:10016000232EF4FC0327C4FD8327C4FCE36EF7FCB5
:1001700013010F0013050000832F44FC832DC4FCE2
:10018000EBE0BF0173000000130000000324C10472
:100190000329810483294104032A0104032DC10397
:1001A000832D81031301010567800000130101FD08
:1001B00023268102130401032326A4FE2324B4FE74
:1001C0002322C4FE2320D4FE232EE4FC232CF4FCA3
:1001D000832384FDB70700809387070F13830700ED
:1001E0000325C4FE832584FE832744FE138607006F
:1001F000832604FE0327C4FD938703006B000300DE
:10020000130000000324C1021301010367800000F2
:10021000130101FC232E1102232C8102130401047B
:10022000232EA4FC232CB4FC232AC4FC2328D4FCB6
:100230002326E4FC2324F4FC130F01009307100091
:100240002326F4FE6F00000313010180832784FC42
:100250000327C4FC832604FD032644FD8325C4FE36
:10026000032584FDEFF09FF48327C4FE93871700D6
:100270002326F4FE0327C4FE8327C4FDE366F7FCB0
:1002800013010F00032644FD832784FC0327C4FCCD
:10029000832604FD93050000032584FDEFF05FE550
:1002A00073000000130000008320C10303248103B6
:1002B0001301010467800000130101FF2326810060
:1002C0002324910013040101938704001385070080
:1002D0000324C10083248100130101016780000011
:1002E000130101FF232681002324210113040101AE
:1002F00093070900138507000324C1000329810027
:100300001301010167800000130101FF2326810012
:100310002324310113040101938709001385070089
:100320000324C100832981001301010167800000BB
:10033000130101FF2326810023244101130401013D
:1003400093070A00138507000324C100032A8100D4
:100350001301010167800000130101FF23268100C2
:100360001304010137F1FF7F130000000324C100D3
:08037000130101016780000088
:10001000EF008046EF004048B70700819387070054
:10002000370700811307C719B70600819386461664
:10003000370600801306060793052000130570009D
:10004000EF00002E13000000EF00807E93070500F4
:10005000E38C07FE73000000930700001385070080
:100060008320C10003248100130101016780000087
:10007000130101FD2326110223248102130401032D
:10008000232EA4FC232CB4FCEF0080372326A4FEEF
:10009000EF0080392324A4FEEF00803B2322A4FE3E
:1000A000832784FD939717000327C4FDB307F70048
:1000B0002320F4FE832704FE939727000327C4FE22
:1000C000B307F70083A60700832704FE9397270052
:1000D000032784FEB307F70003A70700832704FE66
:1000E00093972700032644FEB307F6003387E60004
:1000F00023A0E700130000008320C1020324810233
:100100001301010367800000130101FC232E81020B
:10011000232CA103232AB10313040104232EA4FCDE
:10012000232CB4FC232AC4FC2328D4FC2326E4FC7F
:100130002324F4FC930F050093850500938B0600A0
:10014000130C0700938C0700130F01009307100096
:100150002326F4FE6F0000020325C4FE0323C4FE21
:10016000130101806B5003008327C4FE938717009F
:100170002326F4FE13870F008327C4FEE3EEE7FC7B
:1001800013010F0013050000832F44FD832DC4FDD0
:10019000EBE0BF01B7070080938747201385070076
:1001A0006B400500130000000324C103032D8103ED
:1001B000832D41031301010467800000130101FD39
:1001C00023268102130401032326A4FE2324B4FE64
:1001D0002322C4FE2320D4FE232EE4FC232CF4FC93
:1001E00093830700B7070080938787101383070066
:1001F0006B000300130000000324C102130101037C
:1002000067800000130101FD2326110223248102CF
:1002100013040103EF00C061930705006384070026
:1002200073000000930744FD13850700EF00404B67
:100230008327C4FD13810700032584FD832544FD26
:10024000032604FE832644FE032784FE8327C4FE80
:10025000EFF0DFF6130000008320C10203248102C7
:100260001301010367800000130101FD2326110221
:100270002324810213040103130F01006F004003C4
:10028000930744FD13850700EF0080458327C4FDD5
:1002900013810700032584FD832544FD032604FE06
:1002A000832644FE032784FE8327C4FEEFF01FF15C
:1002B000EF0000589307050063980700EF00405ACD
:1002C00093070500E39E07FA13010F0013000000D7
:1002D0008320C1020324810213010103678000000F
:1002E000130101FE232E810013040102232604FEC4
:1002F0006F0000018327C4FE938717002326F4FEB6
:100300000327C4FEB72700009387F770E3D4E7FE06
:10031000130000000324C1011301010267800000E3
:10032000130101FB23261104232481041304010576
:100330002326A4FC2324B4FC2322C4FC2320D4FCC5
:10034000232EE4FA232CF4FA130F0100232604FED3
:100350006F00C005B70FFFFF3301F1018327C4FE13
:100360002328F4FC832784FC232AF4FC9307010050
:10037000232CF4FC832744FC232EF4FC832704FC69
:100380002320F4FE8327C4FB2322F4FE832784FB6F
:100390002324F4FE930704FD13850700EF00801566
:1003A0008327C4FE938717002326F4FE0327C4FE89
:1003B0008327C4FCE360F7FA13010F00EFF0DFEAD4
:1003C000EFF01FF2130000008320C1040324810416
:1003D0001301010567800000130101FF232681003E
:1003E000232471011304010193870B001385070077
:1003F0000324C100832B81001301010167800000E9
:10040000130101FF2326810023247101130401013C
:1004100093870B00138507000324C100832B810001
:100420001301010167800000130101FF23268100F1
:10043000232481011304010193070C001385070095
:100440000324C100032C8100130101016780000017
:10045000130101FF232681002324910113040101CC
:1004600093870C00138507000324C100832C8100AF
:100470001301010167800000130101FF23268100A1
:100480001304010137F1FF7F130000000324C100B2
:100490001301010167800000130101FF2326810081
:1004A00013040101B70700819387870323AC07106A
:1004B000B70700819387870323AE0710B707008132
:1004C0009387870323A00712B707008193878703C9
:1004D0001307700023A2E712B707008193878703F1
:1004E00023A40712130000000324C100130101011B
:1004F00067800000130101FE232E81001304010216
:100500002326A4FEB70700819387870383A70712DA
:1005100013871700B70700819387870323A0E7128B
:10052000B70700819387870303A7C7118327C4FEFA
:1005300083A60700370600819307070093973700CB
:10054000B387E7409397270013078603B387E70035
:1005500023A0D700B70700819387870303A7C7119C
:100560008327C4FE83A64700B7070081138687034D
:100570009307070093973700B387E74093972700C7
:10058000B307F60023A2D700B7070081938787033C
:1005900003A7C7118327C4FE83A68700B70700817E
:1005A000138687039307070093973700B387E740C5
:1005B00093972700B307F60023A4D700B70700815D
:1005C0009387870303A7C7118327C4FE83A6C700A9
:1005D000B7070081138687039307070093973700B7
:1005E000B387E74093972700B307F60023A6D70009
:1005F000B70700819387870303A7C7118327C4FE2A
:1006000083A60701370600819307070093973700F9
:10061000B387E7409397270013078603B387E70064
:1006200023A8D700B70700819387870303A7C711C3
:100630008327C4FE83A64701B7070081138687037B
:100640009307070093973700B387E74093972700F6
:10065000B307F60023AAD700B70700819387870363
:1006600003A7C7118327C4FE83A68701B7070081AC
:10067000138687039307070093973700B387E740F4
:1006800093972700B307F60023ACD700B707008184
:100690009387870383A7C7111387170093079000D9
:1006A00063E2E702B70700819387870383A7C71137
:1006B00013871700B70700819387870323AEE710DE
:1006C0006F000001B70700819387870323AE0710EF
:1006D000130000000324C101130101026780000020
:1006E000130101FD2326810213040103232EA4FC20
:1006F000B70700819387870383A707121387F7FF44
:10070000B70700819387870323A0E712B70700810B
:100710009387870303A787119307070093973700F1
:10072000B387E740939727003707008113078703B4
:10073000B387E7002326F4FEB7070081938787037A
:1007400083A78711138717009307900063E2E702DE
:10075000B70700819387870383A787111387170043
:10076000B70700819387870323ACE7106F00000170
:10077000B70700819387870323AC07108327C4FE44
:1007800003A707008327C4FD23A0E7008327C4FE37
:1007900003A747008327C4FD23A2E7008327C4FEE5
:1007A00003A787008327C4FD23A4E7008327C4FE93
:1007B00003A7C7008327C4FD23A6E7008327C4FE41
:1007C00003A707018327C4FD23A8E7008327C4FEEE
:1007D00003A747018327C4FD23AAE7008327C4FE9C
:1007E00003A787018327C4FD23ACE70013000000A3
:1007F0000324C1021301010367800000130101FFFC
:100800002326810013040101B70700819387870322
:1008100083A70712938767FF93B7170093F7F70F24
:10082000138507000324C100130101016780000044
:10083000130101FF2326810013040101B707008182
:100840009387870383A7071293B7170093F7F70FD0
:10085000138507000324C100130101016780000014
:10086000130101FF2326810013040101B707008152
:100870009387870303A78712B707008193878703AE
:1008800083A74712B337F70093F7F70F13850700D5
:0C0890000324C100130101016780000077
:02000004810079
:1000400001000000010000000600000000000000A8
:100050000300000001000000010000000200000099
:100060000000000003000000060000000700000080
:100070000500000007000000070000000900000064
:10008000000000000200000002000000000000006C
:100090000500000000000000010000000100000059
:1000A000040000000200000000000000000000004A
:1000B0000300000002000000030000000200000036
:100164000100000001000000060000000000000083
:100174000300000001000000010000000200000074
:10018400000000000300000006000000070000005B
:10019400050000000700000000000000020000004D
:1001A4000200000000000000050000000000000044
:1001B4000100000001000000040000000200000033
:1001C4000000000000000000030000000200000026
:040000058000000077
:00000001FF

View file

@ -1,3 +1,3 @@
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c -o gpgpu_test.elf
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objdump -D gpgpu_test.elf > gpgpu_test.dump
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex
/opt/riscv/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c ./lib/queue.c -o gpgpu_test.elf
/opt/riscv/bin/riscv32-unknown-linux-gnu-objdump -D gpgpu_test.elf > gpgpu_test.dump
/opt/riscv/bin/riscv32-unknown-linux-gnu-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex

View file

@ -1,23 +1,25 @@
#include "lib.h"
// namespace Sphinx
// {
void createThreads(unsigned num_threads, unsigned wid, unsigned func_addr, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
{
register unsigned *xx asm("s2") = x_ptr;
register unsigned *yy asm("s3") = y_ptr;
register unsigned *zz asm("s4") = z_ptr;
register unsigned wid_ asm("a1") = wid;
asm __volatile__("mv t6, a0");
asm __volatile__("mv a1, a1");
asm __volatile__("mv s7, a3");
asm __volatile__("mv s8, a4");
asm __volatile__("mv s9, a5");
asm __volatile__("addi t5, sp, 0");
for (unsigned i = 1; i < num_threads; i++)
register unsigned num_threads_ asm("t6");
for (unsigned i = 1; i < num_threads_; i++)
{
register unsigned cur_tid asm("a0") = i;
register unsigned not_sure asm("t1") = i;
asm __volatile__("addi sp, sp, -256");
asm __volatile__("addi sp, sp, -2048");
CLONE;
}
asm __volatile__("addi sp, t5, 0");
@ -32,75 +34,177 @@ void createThreads(unsigned num_threads, unsigned wid, unsigned func_addr, unsig
JALRS;
ECALL;
register unsigned jump_dest asm("a0") = (unsigned) reschedule_warps;
JMPRT;
// // register unsigned *xx asm("s7") = x_ptr;
// // register unsigned *yy asm("s8") = y_ptr;
// // register unsigned *zz asm("s9") = z_ptr;
// register unsigned wid_ asm("a1") = wid;
// asm __volatile__("addi t5, sp, 0");
// for (unsigned i = 1; i < num_threads; i++)
// {
// register unsigned cur_tid asm("a0") = i;
// register unsigned not_sure asm("t1") = i;
// asm __volatile__("addi sp, sp, -256");
// CLONE;
// }
// asm __volatile__("addi sp, t5, 0");
// register unsigned cur_tid asm("a0") = 0;
// // jalis TO FUNC
// register unsigned num_lanes asm("t6") = func_addr;
// register unsigned link asm("s11") = num_threads;
// JALRS;
// register unsigned jump_dest asm("a0") = (unsigned) reschedule_warps;
// JMPRT;
}
void wspawn(unsigned num_threads, unsigned wid, FUNC, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
void wspawn(unsigned num_threads, unsigned wid, unsigned func, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
{
register unsigned *tzz asm("t2") = z_ptr;
asm __volatile__("mv t2, a5");
// asm __volatile__("mv t1, a5");
register unsigned func_add asm("t1") = (unsigned) &createThreads;
register unsigned n_threads asm("a0") = num_threads;
register unsigned wwid asm("a1") = wid;
register unsigned ffunc asm("a2") = (unsigned) func;
register unsigned *xx asm("a3") = x_ptr;
register unsigned *yy asm("a4") = y_ptr;
register unsigned *zz asm("a5") = tzz;
WSPAWN; // THIS SHOULD COPY THE CSR REGISTERS TO THE NEW WARP
// register unsigned *tzz asm("t2") = z_ptr;
// register unsigned func_add asm("t1") = (unsigned) &createThreads;
// register unsigned n_threads asm("a0") = num_threads;
// register unsigned wwid asm("a1") = wid;
// register unsigned ffunc asm("a2") = func;
// register unsigned *xx asm("a3") = x_ptr;
// register unsigned *yy asm("a4") = y_ptr;
// register unsigned *zz asm("a5") = tzz;
// WSPAWN; // THIS SHOULD COPY THE CSR REGISTERS TO THE NEW WARP
}
void reschedule_warps()
{
if (queue_isEmpty())
{
ECALL;
}
Job j;
queue_dequeue(&j);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
wspawn(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z);
}
void schedule_warps()
{
asm __volatile__("mv t5, sp");
while (!queue_isEmpty() && queue_availableWarps())
{
Job j;
queue_dequeue(&j);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
wspawn(j.n_threads, j.wid, j.func_ptr, j.x, j.y, j.z);
}
asm __volatile__("mv sp, t5");
}
void sleep()
{
for(int z = 0; z < 10000; z++) {}
}
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_ptr, unsigned * y_ptr, unsigned * z_ptr)
{
asm __volatile__("addi t5, sp, 0");
for (unsigned i = 1; i < num_Warps; i++)
for (unsigned i = 0; i < num_Warps; i++)
{
asm __volatile__("addi sp, sp, -2048");
wspawn(num_threads, i, func, x_ptr, y_ptr, z_ptr);
}
asm __volatile__("lui t6, 0xFFFF0");
asm __volatile__("add sp, sp, t6");
register unsigned stack_ptr asm("sp");
Job j;
j.wid = i;
j.n_threads = num_threads;
j.base_sp = stack_ptr;
j.func_ptr = (unsigned) func;
j.x = x_ptr;
j.y = y_ptr;
j.z = z_ptr;
queue_enqueue(&j);
}
asm __volatile__("addi sp, t5, 0");
createThreads(num_threads, 0, (unsigned) func, x_ptr, y_ptr, z_ptr);
schedule_warps();
ECALL;
sleep();
// asm __volatile__("addi t5, sp, 0");
// for (unsigned i = 1; i < num_Warps; i++)
// {
// asm __volatile__("addi sp, sp, -2048");
// wspawn(num_threads, i, func, x_ptr, y_ptr, z_ptr);
// }
// asm __volatile__("addi sp, t5, 0");
// createThreads(num_threads, 0, (unsigned) func, x_ptr, y_ptr, z_ptr);
}
unsigned get_wid()
{
register unsigned ret asm("s1");
register unsigned ret asm("s7");
return ret;
}
unsigned * get_1st_arg(void)
{
register unsigned *ret asm("s2");
register unsigned *ret asm("s7");
return ret;
}
unsigned * get_2nd_arg(void)
{
register unsigned *ret asm("s3");
register unsigned *ret asm("s8");
return ret;
}
unsigned * get_3rd_arg(void)
{
register unsigned *ret asm("s4");
register unsigned *ret asm("s9");
return ret;
}
void initiate_stack()
{
asm __volatile__("lui sp,0x7ffff");
asm __volatile__("lui sp,0x7ffff":::);
}

View file

@ -2,16 +2,18 @@
#ifndef __RISCV_GP_
#define __RISCV_GP_
#include "queue.h"
#define WSPAWN asm __volatile__(".word 0x3006b"::);
#define CLONE asm __volatile__(".word 0x3506b":::"t1");
#define CLONE asm __volatile__(".word 0x3506b":::);
#define JALRS asm __volatile__(".word 0x1bfe0eb":::"s10")
#define ECALL asm __volatile__(".word 0x00000073")
#define JMPRT asm __volatile__(".word 0x5406b")
#define FUNC void (func)(unsigned, unsigned)
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned *, unsigned *, unsigned *);
void reschedule_warps(void);
unsigned get_wid();
unsigned * get_1st_arg(void);

View file

@ -1,90 +1,28 @@
#include "queue.h"
unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
int main()
{
Job j;
j.func_ptr = (unsigned) func;
j.x = x;
j.y = y;
j.z = z;
enqueue(j);
enqueue(j);
enqueue(j);
enqueue(j);
enqueue(j);
enqueue(j);
j = dequeue();
j = dequeue();
enqueue(j);
enqueue(j);
if (!isFull())
{
enqueue(j);
}
if (!isFull())
{
enqueue(j);
}
if (!isFull())
{
enqueue(j);
}
if (!isFull())
{
enqueue(j);
}
if (!isFull())
{
enqueue(j);
}
if (!isFull())
{
enqueue(j);
}
dequeue();
dequeue();
dequeue();
dequeue();
dequeue();
dequeue();
dequeue();
dequeue();
dequeue();
dequeue();
return 0;
}
void initialize_queue(void)
void queue_initialize(void)
{
q.start_i = 0;
q.end_i = 0;
q.num_j = 0;
q.start_i = 0;
q.end_i = 0;
q.num_j = 0;
q.total_warps = 7;
q.active_warps = 0;
}
void enqueue(Job j)
void queue_enqueue(Job * j)
{
q.num_j++;
q.jobs[q.end_i] = j;
// q.jobs[q.end_i] = j;
q.jobs[q.end_i].wid = j->wid;
q.jobs[q.end_i].n_threads = j->n_threads;
q.jobs[q.end_i].base_sp = j->base_sp;
q.jobs[q.end_i].func_ptr = j->func_ptr;
q.jobs[q.end_i].x = j->x;
q.jobs[q.end_i].y = j->y;
q.jobs[q.end_i].z = j->z;
if ((q.end_i + 1) < SIZE)
{
q.end_i++;
@ -96,10 +34,10 @@ void enqueue(Job j)
}
Job dequeue(void)
void queue_dequeue(Job * r)
{
q.num_j--;
Job j = q.jobs[q.start_i];
Job * j = &(q.jobs[q.start_i]);
if ((q.start_i + 1) < SIZE)
{
q.start_i++;
@ -108,19 +46,28 @@ Job dequeue(void)
{
q.start_i = 0;
}
r->wid = j->wid;
r->n_threads = j->n_threads;
r->base_sp = j->base_sp;
r->func_ptr = j->func_ptr;
r->x = j->x;
r->y = j->y;
r->z = j->z;
}
int isFull(void)
int queue_isFull(void)
{
return (q.num_j == SIZE);
}
int isEmpty(void)
int queue_isEmpty(void)
{
return (q.num_j == 0);
}
void func()
int queue_availableWarps()
{
return (q.active_warps < q.total_warps);
}

View file

@ -10,6 +10,9 @@
typedef struct Job_t
{
unsigned wid;
unsigned n_threads;
unsigned base_sp;
unsigned func_ptr;
unsigned * x;
unsigned * y;
@ -24,19 +27,22 @@ typedef struct Queue_t
unsigned start_i;
unsigned end_i;
unsigned num_j;
unsigned total_warps;
unsigned active_warps;
} Queue;
Queue q;
void initialize_queue(void);
void queue_initialize(void);
void enqueue(Job);
void queue_enqueue(Job *);
Job dequeue(void);
void queue_dequeue(Job *);
int isFull(void);
int isEmpty(void);
int queue_isFull(void);
int queue_isEmpty(void);
int queue_availableWarps();
void func();