Changed name to vortex for now + Fixed library structure

This commit is contained in:
felsabbagh3 2019-03-18 21:10:16 -04:00
parent 1ebd7a6969
commit 4266c8d86c
28 changed files with 1498 additions and 1329 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
src/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -1,18 +0,0 @@
COMP = /opt/riscv/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib
DMP = /opt/riscv/bin/riscv32-unknown-elf-objdump
CPY = /opt/riscv/bin/riscv32-unknown-elf-objcopy
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D gpgpu_test.elf > gpgpu_test.dump
HEX: ELF
$(CPY) -O ihex gpgpu_test.elf gpgpu_test.hex
ELF:
$(COMP) $(CC_FLAGS) ./lib/lib.s gpgpu_test.c ./lib/queue.s ./lib/lib.c -o gpgpu_test.elf

View file

@ -1,115 +0,0 @@
// #include <stdint.h>
// #include <cstdint>
extern void print_consol(char *);
extern void printc(char);
int main(void);
void matMult (unsigned, unsigned);
#include "./lib/lib.h"
// unsigned x[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 , 1 , 1 , 1 , 1 , 1 };
// unsigned y[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
// unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
unsigned x[256] = {0};
unsigned y[256] = {0};
unsigned z[256] = {0};
#define MAT_DIM 16
#define MAX_THREADS 8
#define NUM_WARPS MAT_DIM
#define NUM_THREADS MAX_THREADS
typedef struct
{
unsigned * x;
unsigned * y;
unsigned * z;
unsigned mat_dim;
unsigned offset;
} matMult_arg_t;
matMult_arg_t args;
int main()
{
for (int i = 0; i < 8; i++)
{
queue_initialize(q + i);
}
for (int i = 0; i < (MAT_DIM * MAT_DIM); i++)
{
x[i] = 3;
y[i] = 2;
}
args.x = x;
args.y = y;
args.z = z;
args.mat_dim = MAT_DIM;
args.offset = (MAT_DIM/MAX_THREADS);
createWarps(NUM_WARPS, NUM_THREADS, matMult, (void *) (&args));
wait_for_done(8);
print_consol("-------------------------\n");
print_consol("FINAL Z\n");
for (int j = 0; j < (MAT_DIM * MAT_DIM); j++)
{
if ((j % MAT_DIM) == 0) print_consol("\n");
int_print(z[j]);
print_consol(" ");
}
print_consol("\n-------------------------------\n");
return 0;
}
void matMult(unsigned tid, unsigned wid)
{
matMult_arg_t * args = (matMult_arg_t *) get_1st_arg();
unsigned * x_ptr = args->x;
unsigned * y_ptr = args->y;
unsigned * z_ptr = args->z;
unsigned off = args->offset;
unsigned i_index = off * tid;
unsigned mat_dim = args->mat_dim;
for (int iter = 0; iter < off; ++iter)
{
unsigned total = 0;
for (unsigned place = 0; place < mat_dim; ++place)
{
unsigned x_i = (wid * mat_dim) + place;
unsigned y_i = (mat_dim * place) + i_index;
total += (x_ptr[x_i] * y_ptr[y_i]);
}
int final_i = (wid * mat_dim) + i_index;
z_ptr[final_i] = total;
i_index++;
}
return;
}

View file

@ -1,902 +0,0 @@
gpgpu_test.elf: file format elf32-littleriscv
Disassembly of section .text:
80000000 <_start>:
80000000: 7ffff137 lui sp,0x7ffff
80000004: 0c4000ef jal ra,800000c8 <main>
80000008: 00000073 ecall
8000000c <createThreads>:
8000000c: 00068b93 mv s7,a3
80000010: 00070d13 mv s10,a4
80000014: 00010f13 mv t5,sp
80000018: 00050393 mv t2,a0
8000001c <loop_init>:
8000001c: 00100513 li a0,1
80000020 <loop_cond>:
80000020: 00755c63 bge a0,t2,80000038 <loop_done>
80000024 <loop_body>:
80000024: 80010113 addi sp,sp,-2048 # 7fffe800 <SIZE+0x7fffe7ce>
80000028: 00050313 mv t1,a0
8000002c: 0003506b 0x3506b
80000030 <loop_inc>:
80000030: 00150513 addi a0,a0,1
80000034: fedff06f j 80000020 <loop_cond>
80000038 <loop_done>:
80000038: 000f0113 mv sp,t5
8000003c: 00000513 li a0,0
80000040: 00060f93 mv t6,a2
80000044: 00038d93 mv s11,t2
80000048: 01bfe0eb 0x1bfe0eb
8000004c: 00000517 auipc a0,0x0
80000050: 5b050513 addi a0,a0,1456 # 800005fc <reschedule_warps>
80000054: 0005406b 0x5406b
80000058 <printc>:
80000058: 000108b7 lui a7,0x10
8000005c: 00b8a023 sw a1,0(a7) # 10000 <SIZE+0xffce>
80000060: 00008067 ret
80000064 <wspawn>:
80000064: 00000317 auipc t1,0x0
80000068: fa830313 addi t1,t1,-88 # 8000000c <createThreads>
8000006c: 0003006b 0x3006b
80000070: 00008067 ret
80000074 <print_consol>:
80000074: ff410113 addi sp,sp,-12
80000078: 00112023 sw ra,0(sp)
8000007c: 00b12223 sw a1,4(sp)
80000080 <bl>:
80000080: 00054583 lbu a1,0(a0)
80000084: 00058863 beqz a1,80000094 <be>
80000088: fd1ff0ef jal ra,80000058 <printc>
8000008c: 00150513 addi a0,a0,1
80000090: ff1ff06f j 80000080 <bl>
80000094 <be>:
80000094: 00012083 lw ra,0(sp)
80000098: 00412583 lw a1,4(sp)
8000009c: 00c10113 addi sp,sp,12
800000a0: 00008067 ret
800000a4 <print_int>:
800000a4: ff410113 addi sp,sp,-12
800000a8: 00112023 sw ra,0(sp)
800000ac: 00b12223 sw a1,4(sp)
800000b0: 03050593 addi a1,a0,48
800000b4: fa5ff0ef jal ra,80000058 <printc>
800000b8: 00012083 lw ra,0(sp)
800000bc: 00412583 lw a1,4(sp)
800000c0: 00c10113 addi sp,sp,12
800000c4: 00008067 ret
800000c8 <main>:
800000c8: fe010113 addi sp,sp,-32
800000cc: 00112e23 sw ra,28(sp)
800000d0: 00812c23 sw s0,24(sp)
800000d4: 02010413 addi s0,sp,32
800000d8: fe042623 sw zero,-20(s0)
800000dc: 0300006f j 8000010c <main+0x44>
800000e0: fec42703 lw a4,-20(s0)
800000e4: 4c400793 li a5,1220
800000e8: 02f70733 mul a4,a4,a5
800000ec: 810017b7 lui a5,0x81001
800000f0: d7478793 addi a5,a5,-652 # 81000d74 <main_sp+0xffffd9d8>
800000f4: 00f707b3 add a5,a4,a5
800000f8: 00078513 mv a0,a5
800000fc: 2f8000ef jal ra,800003f4 <queue_initialize>
80000100: fec42783 lw a5,-20(s0)
80000104: 00178793 addi a5,a5,1
80000108: fef42623 sw a5,-20(s0)
8000010c: fec42703 lw a4,-20(s0)
80000110: 00700793 li a5,7
80000114: fce7d6e3 bge a5,a4,800000e0 <main+0x18>
80000118: fe042423 sw zero,-24(s0)
8000011c: 0480006f j 80000164 <main+0x9c>
80000120: 810007b7 lui a5,0x81000
80000124: fe842703 lw a4,-24(s0)
80000128: 00271713 slli a4,a4,0x2
8000012c: 15478793 addi a5,a5,340 # 81000154 <main_sp+0xffffcdb8>
80000130: 00f707b3 add a5,a4,a5
80000134: 00300713 li a4,3
80000138: 00e7a023 sw a4,0(a5)
8000013c: 810007b7 lui a5,0x81000
80000140: fe842703 lw a4,-24(s0)
80000144: 00271713 slli a4,a4,0x2
80000148: 55478793 addi a5,a5,1364 # 81000554 <main_sp+0xffffd1b8>
8000014c: 00f707b3 add a5,a4,a5
80000150: 00200713 li a4,2
80000154: 00e7a023 sw a4,0(a5)
80000158: fe842783 lw a5,-24(s0)
8000015c: 00178793 addi a5,a5,1
80000160: fef42423 sw a5,-24(s0)
80000164: fe842703 lw a4,-24(s0)
80000168: 0ff00793 li a5,255
8000016c: fae7dae3 bge a5,a4,80000120 <main+0x58>
80000170: 810017b7 lui a5,0x81001
80000174: 81000737 lui a4,0x81000
80000178: 15470713 addi a4,a4,340 # 81000154 <main_sp+0xffffcdb8>
8000017c: d6e7a023 sw a4,-672(a5) # 81000d60 <main_sp+0xffffd9c4>
80000180: 810017b7 lui a5,0x81001
80000184: d6078793 addi a5,a5,-672 # 81000d60 <main_sp+0xffffd9c4>
80000188: 81000737 lui a4,0x81000
8000018c: 55470713 addi a4,a4,1364 # 81000554 <main_sp+0xffffd1b8>
80000190: 00e7a223 sw a4,4(a5)
80000194: 810017b7 lui a5,0x81001
80000198: d6078793 addi a5,a5,-672 # 81000d60 <main_sp+0xffffd9c4>
8000019c: 81001737 lui a4,0x81001
800001a0: 95470713 addi a4,a4,-1708 # 81000954 <main_sp+0xffffd5b8>
800001a4: 00e7a423 sw a4,8(a5)
800001a8: 810017b7 lui a5,0x81001
800001ac: d6078793 addi a5,a5,-672 # 81000d60 <main_sp+0xffffd9c4>
800001b0: 01000713 li a4,16
800001b4: 00e7a623 sw a4,12(a5)
800001b8: 810017b7 lui a5,0x81001
800001bc: d6078793 addi a5,a5,-672 # 81000d60 <main_sp+0xffffd9c4>
800001c0: 00200713 li a4,2
800001c4: 00e7a823 sw a4,16(a5)
800001c8: 810017b7 lui a5,0x81001
800001cc: d6078693 addi a3,a5,-672 # 81000d60 <main_sp+0xffffd9c4>
800001d0: 800007b7 lui a5,0x80000
800001d4: 28c78613 addi a2,a5,652 # 8000028c <main_sp+0xfeffcef0>
800001d8: 00800593 li a1,8
800001dc: 01000513 li a0,16
800001e0: 5dc000ef jal ra,800007bc <createWarps>
800001e4: 00800513 li a0,8
800001e8: 6b4000ef jal ra,8000089c <wait_for_done>
800001ec: 810007b7 lui a5,0x81000
800001f0: 04078513 addi a0,a5,64 # 81000040 <main_sp+0xffffcca4>
800001f4: e81ff0ef jal ra,80000074 <print_consol>
800001f8: 810007b7 lui a5,0x81000
800001fc: 05c78513 addi a0,a5,92 # 8100005c <main_sp+0xffffccc0>
80000200: e75ff0ef jal ra,80000074 <print_consol>
80000204: fe042223 sw zero,-28(s0)
80000208: 0540006f j 8000025c <main+0x194>
8000020c: fe442783 lw a5,-28(s0)
80000210: 00f7f793 andi a5,a5,15
80000214: 00079863 bnez a5,80000224 <main+0x15c>
80000218: 810007b7 lui a5,0x81000
8000021c: 06878513 addi a0,a5,104 # 81000068 <main_sp+0xffffcccc>
80000220: e55ff0ef jal ra,80000074 <print_consol>
80000224: 810017b7 lui a5,0x81001
80000228: fe442703 lw a4,-28(s0)
8000022c: 00271713 slli a4,a4,0x2
80000230: 95478793 addi a5,a5,-1708 # 81000954 <main_sp+0xffffd5b8>
80000234: 00f707b3 add a5,a4,a5
80000238: 0007a783 lw a5,0(a5)
8000023c: 00078513 mv a0,a5
80000240: 2f8000ef jal ra,80000538 <int_print>
80000244: 810007b7 lui a5,0x81000
80000248: 06c78513 addi a0,a5,108 # 8100006c <main_sp+0xffffccd0>
8000024c: e29ff0ef jal ra,80000074 <print_consol>
80000250: fe442783 lw a5,-28(s0)
80000254: 00178793 addi a5,a5,1
80000258: fef42223 sw a5,-28(s0)
8000025c: fe442703 lw a4,-28(s0)
80000260: 0ff00793 li a5,255
80000264: fae7d4e3 bge a5,a4,8000020c <main+0x144>
80000268: 810007b7 lui a5,0x81000
8000026c: 07078513 addi a0,a5,112 # 81000070 <main_sp+0xffffccd4>
80000270: e05ff0ef jal ra,80000074 <print_consol>
80000274: 00000793 li a5,0
80000278: 00078513 mv a0,a5
8000027c: 01c12083 lw ra,28(sp)
80000280: 01812403 lw s0,24(sp)
80000284: 02010113 addi sp,sp,32
80000288: 00008067 ret
8000028c <matMult>:
8000028c: fa010113 addi sp,sp,-96
80000290: 04112e23 sw ra,92(sp)
80000294: 04812c23 sw s0,88(sp)
80000298: 06010413 addi s0,sp,96
8000029c: faa42623 sw a0,-84(s0)
800002a0: fab42423 sw a1,-88(s0)
800002a4: 67c000ef jal ra,80000920 <get_1st_arg>
800002a8: fca42e23 sw a0,-36(s0)
800002ac: fdc42783 lw a5,-36(s0)
800002b0: 0007a783 lw a5,0(a5)
800002b4: fcf42c23 sw a5,-40(s0)
800002b8: fdc42783 lw a5,-36(s0)
800002bc: 0047a783 lw a5,4(a5)
800002c0: fcf42a23 sw a5,-44(s0)
800002c4: fdc42783 lw a5,-36(s0)
800002c8: 0087a783 lw a5,8(a5)
800002cc: fcf42823 sw a5,-48(s0)
800002d0: fdc42783 lw a5,-36(s0)
800002d4: 0107a783 lw a5,16(a5)
800002d8: fcf42623 sw a5,-52(s0)
800002dc: fcc42703 lw a4,-52(s0)
800002e0: fac42783 lw a5,-84(s0)
800002e4: 02f707b3 mul a5,a4,a5
800002e8: fef42623 sw a5,-20(s0)
800002ec: fdc42783 lw a5,-36(s0)
800002f0: 00c7a783 lw a5,12(a5)
800002f4: fcf42423 sw a5,-56(s0)
800002f8: fe042423 sw zero,-24(s0)
800002fc: 0d80006f j 800003d4 <matMult+0x148>
80000300: fe042223 sw zero,-28(s0)
80000304: fe042023 sw zero,-32(s0)
80000308: 0780006f j 80000380 <matMult+0xf4>
8000030c: fa842703 lw a4,-88(s0)
80000310: fc842783 lw a5,-56(s0)
80000314: 02f707b3 mul a5,a4,a5
80000318: fe042703 lw a4,-32(s0)
8000031c: 00f707b3 add a5,a4,a5
80000320: fcf42223 sw a5,-60(s0)
80000324: fc842703 lw a4,-56(s0)
80000328: fe042783 lw a5,-32(s0)
8000032c: 02f707b3 mul a5,a4,a5
80000330: fec42703 lw a4,-20(s0)
80000334: 00f707b3 add a5,a4,a5
80000338: fcf42023 sw a5,-64(s0)
8000033c: fc442783 lw a5,-60(s0)
80000340: 00279793 slli a5,a5,0x2
80000344: fd842703 lw a4,-40(s0)
80000348: 00f707b3 add a5,a4,a5
8000034c: 0007a703 lw a4,0(a5)
80000350: fc042783 lw a5,-64(s0)
80000354: 00279793 slli a5,a5,0x2
80000358: fd442683 lw a3,-44(s0)
8000035c: 00f687b3 add a5,a3,a5
80000360: 0007a783 lw a5,0(a5)
80000364: 02f707b3 mul a5,a4,a5
80000368: fe442703 lw a4,-28(s0)
8000036c: 00f707b3 add a5,a4,a5
80000370: fef42223 sw a5,-28(s0)
80000374: fe042783 lw a5,-32(s0)
80000378: 00178793 addi a5,a5,1
8000037c: fef42023 sw a5,-32(s0)
80000380: fe042703 lw a4,-32(s0)
80000384: fc842783 lw a5,-56(s0)
80000388: f8f762e3 bltu a4,a5,8000030c <matMult+0x80>
8000038c: fa842703 lw a4,-88(s0)
80000390: fc842783 lw a5,-56(s0)
80000394: 02f70733 mul a4,a4,a5
80000398: fec42783 lw a5,-20(s0)
8000039c: 00f707b3 add a5,a4,a5
800003a0: faf42e23 sw a5,-68(s0)
800003a4: fbc42783 lw a5,-68(s0)
800003a8: 00279793 slli a5,a5,0x2
800003ac: fd042703 lw a4,-48(s0)
800003b0: 00f707b3 add a5,a4,a5
800003b4: fe442703 lw a4,-28(s0)
800003b8: 00e7a023 sw a4,0(a5)
800003bc: fec42783 lw a5,-20(s0)
800003c0: 00178793 addi a5,a5,1
800003c4: fef42623 sw a5,-20(s0)
800003c8: fe842783 lw a5,-24(s0)
800003cc: 00178793 addi a5,a5,1
800003d0: fef42423 sw a5,-24(s0)
800003d4: fe842783 lw a5,-24(s0)
800003d8: fcc42703 lw a4,-52(s0)
800003dc: f2e7e2e3 bltu a5,a4,80000300 <matMult+0x74>
800003e0: 00000013 nop
800003e4: 05c12083 lw ra,92(sp)
800003e8: 05812403 lw s0,88(sp)
800003ec: 06010113 addi sp,sp,96
800003f0: 00008067 ret
800003f4 <queue_initialize>:
800003f4: 00050293 mv t0,a0
800003f8: 00000313 li t1,0
800003fc: 00700393 li t2,7
80000400: 0062a023 sw t1,0(t0)
80000404: 0062a223 sw t1,4(t0)
80000408: 0062a423 sw t1,8(t0)
8000040c: 0072a623 sw t2,12(t0)
80000410: 0062a823 sw t1,16(t0)
80000414: 00008067 ret
80000418 <queue_enqueue>:
80000418: 00050293 mv t0,a0
8000041c: 0082a303 lw t1,8(t0)
80000420: 00130313 addi t1,t1,1
80000424: 0062a423 sw t1,8(t0)
80000428: 01428313 addi t1,t0,20
8000042c: 0042ae83 lw t4,4(t0)
80000430: 005e9393 slli t2,t4,0x5
80000434: 00730333 add t1,t1,t2
80000438: 0005ae03 lw t3,0(a1)
8000043c: 01c32023 sw t3,0(t1)
80000440: 0045ae03 lw t3,4(a1)
80000444: 01c32223 sw t3,4(t1)
80000448: 0085ae03 lw t3,8(a1)
8000044c: 01c32423 sw t3,8(t1)
80000450: 00c5ae03 lw t3,12(a1)
80000454: 01c32623 sw t3,12(t1)
80000458: 0105ae03 lw t3,16(a1)
8000045c: 01c32823 sw t3,16(t1)
80000460: 0145ae03 lw t3,20(a1)
80000464: 01c32a23 sw t3,20(t1)
80000468: 001e8e93 addi t4,t4,1
8000046c: 03200f13 li t5,50
80000470: 01ee9463 bne t4,t5,80000478 <ec>
80000474: 00000e93 li t4,0
80000478 <ec>:
80000478: 01d2a223 sw t4,4(t0)
8000047c: 00008067 ret
80000480 <queue_dequeue>:
80000480: 00050293 mv t0,a0
80000484: 0082a303 lw t1,8(t0)
80000488: fff30313 addi t1,t1,-1
8000048c: 0062a423 sw t1,8(t0)
80000490: 01428313 addi t1,t0,20
80000494: 0002ae83 lw t4,0(t0)
80000498: 03200f93 li t6,50
8000049c: 000e8f13 mv t5,t4
800004a0: 001f0f13 addi t5,t5,1
800004a4: 01ff1463 bne t5,t6,800004ac <dc>
800004a8: 00000f13 li t5,0
800004ac <dc>:
800004ac: 01e2a023 sw t5,0(t0)
800004b0: 005e9393 slli t2,t4,0x5
800004b4: 00730333 add t1,t1,t2
800004b8: 00032e03 lw t3,0(t1)
800004bc: 01c5a023 sw t3,0(a1)
800004c0: 00432e03 lw t3,4(t1)
800004c4: 01c5a223 sw t3,4(a1)
800004c8: 00832e03 lw t3,8(t1)
800004cc: 01c5a423 sw t3,8(a1)
800004d0: 00c32e03 lw t3,12(t1)
800004d4: 01c5a623 sw t3,12(a1)
800004d8: 01032e03 lw t3,16(t1)
800004dc: 01c5a823 sw t3,16(a1)
800004e0: 01432e03 lw t3,20(t1)
800004e4: 01c5aa23 sw t3,20(a1)
800004e8: 00008067 ret
800004ec <queue_isFull>:
800004ec: 00050293 mv t0,a0
800004f0: 0082a303 lw t1,8(t0)
800004f4: 00000513 li a0,0
800004f8: 03200e13 li t3,50
800004fc: 006e1463 bne t3,t1,80000504 <qf>
80000500: 00150513 addi a0,a0,1
80000504 <qf>:
80000504: 00008067 ret
80000508 <queue_isEmpty>:
80000508: 00050293 mv t0,a0
8000050c: 0082a303 lw t1,8(t0)
80000510: 00000513 li a0,0
80000514: 00000e13 li t3,0
80000518: 006e1463 bne t3,t1,80000520 <qe>
8000051c: 00150513 addi a0,a0,1
80000520 <qe>:
80000520: 00008067 ret
80000524 <queue_availableWarps>:
80000524: 00050293 mv t0,a0
80000528: 00c2a303 lw t1,12(t0)
8000052c: 0102a383 lw t2,16(t0)
80000530: 0063b533 sltu a0,t2,t1
80000534: 00008067 ret
80000538 <int_print>:
80000538: fd010113 addi sp,sp,-48
8000053c: 02112623 sw ra,44(sp)
80000540: 02812423 sw s0,40(sp)
80000544: 03010413 addi s0,sp,48
80000548: fca42e23 sw a0,-36(s0)
8000054c: fdc42703 lw a4,-36(s0)
80000550: 00f00793 li a5,15
80000554: 02e7e463 bltu a5,a4,8000057c <int_print+0x44>
80000558: 810007b7 lui a5,0x81000
8000055c: fdc42703 lw a4,-36(s0)
80000560: 00271713 slli a4,a4,0x2
80000564: 11478793 addi a5,a5,276 # 81000114 <main_sp+0xffffcd78>
80000568: 00f707b3 add a5,a4,a5
8000056c: 0007a783 lw a5,0(a5)
80000570: 00078513 mv a0,a5
80000574: b01ff0ef jal ra,80000074 <print_consol>
80000578: 0740006f j 800005ec <int_print+0xb4>
8000057c: 02000793 li a5,32
80000580: fef42623 sw a5,-20(s0)
80000584: fe0405a3 sb zero,-21(s0)
80000588: fec42783 lw a5,-20(s0)
8000058c: ffc78793 addi a5,a5,-4
80000590: fdc42703 lw a4,-36(s0)
80000594: 00f757b3 srl a5,a4,a5
80000598: 00f7f793 andi a5,a5,15
8000059c: fef42223 sw a5,-28(s0)
800005a0: fe442783 lw a5,-28(s0)
800005a4: 00078663 beqz a5,800005b0 <int_print+0x78>
800005a8: 00100793 li a5,1
800005ac: fef405a3 sb a5,-21(s0)
800005b0: feb44783 lbu a5,-21(s0)
800005b4: 02078263 beqz a5,800005d8 <int_print+0xa0>
800005b8: 810007b7 lui a5,0x81000
800005bc: fe442703 lw a4,-28(s0)
800005c0: 00271713 slli a4,a4,0x2
800005c4: 11478793 addi a5,a5,276 # 81000114 <main_sp+0xffffcd78>
800005c8: 00f707b3 add a5,a4,a5
800005cc: 0007a783 lw a5,0(a5)
800005d0: 00078513 mv a0,a5
800005d4: aa1ff0ef jal ra,80000074 <print_consol>
800005d8: fec42783 lw a5,-20(s0)
800005dc: ffc78793 addi a5,a5,-4
800005e0: fef42623 sw a5,-20(s0)
800005e4: fec42783 lw a5,-20(s0)
800005e8: faf040e3 bgtz a5,80000588 <int_print+0x50>
800005ec: 02c12083 lw ra,44(sp)
800005f0: 02812403 lw s0,40(sp)
800005f4: 03010113 addi sp,sp,48
800005f8: 00008067 ret
800005fc <reschedule_warps>:
800005fc: fd010113 addi sp,sp,-48
80000600: 02112623 sw ra,44(sp)
80000604: 02812423 sw s0,40(sp)
80000608: 03a12223 sw s10,36(sp)
8000060c: 03010413 addi s0,sp,48
80000610: 000d0713 mv a4,s10
80000614: 4c400793 li a5,1220
80000618: 02f70733 mul a4,a4,a5
8000061c: 810017b7 lui a5,0x81001
80000620: d7478793 addi a5,a5,-652 # 81000d74 <main_sp+0xffffd9d8>
80000624: 00f707b3 add a5,a4,a5
80000628: 00078513 mv a0,a5
8000062c: eddff0ef jal ra,80000508 <queue_isEmpty>
80000630: 00050793 mv a5,a0
80000634: 02078063 beqz a5,80000654 <reschedule_warps+0x58>
80000638: 000d0713 mv a4,s10
8000063c: 810037b7 lui a5,0x81003
80000640: 39478793 addi a5,a5,916 # 81003394 <main_sp+0xfffffff8>
80000644: 00f707b3 add a5,a4,a5
80000648: 00100713 li a4,1
8000064c: 00e78023 sb a4,0(a5)
80000650: 00000073 ecall
80000654: 000d0713 mv a4,s10
80000658: 4c400793 li a5,1220
8000065c: 02f70733 mul a4,a4,a5
80000660: 810017b7 lui a5,0x81001
80000664: d7478793 addi a5,a5,-652 # 81000d74 <main_sp+0xffffd9d8>
80000668: 00f707b3 add a5,a4,a5
8000066c: fd840713 addi a4,s0,-40
80000670: 00070593 mv a1,a4
80000674: 00078513 mv a0,a5
80000678: e09ff0ef jal ra,80000480 <queue_dequeue>
8000067c: fe042783 lw a5,-32(s0)
80000680: 00078113 mv sp,a5
80000684: fdc42783 lw a5,-36(s0)
80000688: fd842583 lw a1,-40(s0)
8000068c: fe442603 lw a2,-28(s0)
80000690: fe842683 lw a3,-24(s0)
80000694: fec42703 lw a4,-20(s0)
80000698: 00078513 mv a0,a5
8000069c: 971ff0ef jal ra,8000000c <createThreads>
800006a0: 00000073 ecall
800006a4: 00000013 nop
800006a8: 02c12083 lw ra,44(sp)
800006ac: 02812403 lw s0,40(sp)
800006b0: 02412d03 lw s10,36(sp)
800006b4: 03010113 addi sp,sp,48
800006b8: 00008067 ret
800006bc <schedule_warps>:
800006bc: fd010113 addi sp,sp,-48
800006c0: 02112623 sw ra,44(sp)
800006c4: 02812423 sw s0,40(sp)
800006c8: 03010413 addi s0,sp,48
800006cc: 00010993 mv s3,sp
800006d0: fe042623 sw zero,-20(s0)
800006d4: 0840006f j 80000758 <schedule_warps+0x9c>
800006d8: fec42703 lw a4,-20(s0)
800006dc: 4c400793 li a5,1220
800006e0: 02f70733 mul a4,a4,a5
800006e4: 810017b7 lui a5,0x81001
800006e8: d7478793 addi a5,a5,-652 # 81000d74 <main_sp+0xffffd9d8>
800006ec: 00f707b3 add a5,a4,a5
800006f0: 00078513 mv a0,a5
800006f4: e15ff0ef jal ra,80000508 <queue_isEmpty>
800006f8: 00050793 mv a5,a0
800006fc: 04079863 bnez a5,8000074c <schedule_warps+0x90>
80000700: fec42703 lw a4,-20(s0)
80000704: 4c400793 li a5,1220
80000708: 02f70733 mul a4,a4,a5
8000070c: 810017b7 lui a5,0x81001
80000710: d7478793 addi a5,a5,-652 # 81000d74 <main_sp+0xffffd9d8>
80000714: 00f707b3 add a5,a4,a5
80000718: fd440713 addi a4,s0,-44
8000071c: 00070593 mv a1,a4
80000720: 00078513 mv a0,a5
80000724: d5dff0ef jal ra,80000480 <queue_dequeue>
80000728: fdc42783 lw a5,-36(s0)
8000072c: 00078113 mv sp,a5
80000730: fd842783 lw a5,-40(s0)
80000734: fd442583 lw a1,-44(s0)
80000738: fe042603 lw a2,-32(s0)
8000073c: fe442683 lw a3,-28(s0)
80000740: fe842703 lw a4,-24(s0)
80000744: 00078513 mv a0,a5
80000748: 91dff0ef jal ra,80000064 <wspawn>
8000074c: fec42783 lw a5,-20(s0)
80000750: 00178793 addi a5,a5,1
80000754: fef42623 sw a5,-20(s0)
80000758: fec42703 lw a4,-20(s0)
8000075c: 00600793 li a5,6
80000760: f6e7dce3 bge a5,a4,800006d8 <schedule_warps+0x1c>
80000764: 00098113 mv sp,s3
80000768: 00000013 nop
8000076c: 02c12083 lw ra,44(sp)
80000770: 02812403 lw s0,40(sp)
80000774: 03010113 addi sp,sp,48
80000778: 00008067 ret
8000077c <sleep>:
8000077c: fd010113 addi sp,sp,-48
80000780: 02812623 sw s0,44(sp)
80000784: 03010413 addi s0,sp,48
80000788: fca42e23 sw a0,-36(s0)
8000078c: fe042623 sw zero,-20(s0)
80000790: 0100006f j 800007a0 <sleep+0x24>
80000794: fec42783 lw a5,-20(s0)
80000798: 00178793 addi a5,a5,1
8000079c: fef42623 sw a5,-20(s0)
800007a0: fec42703 lw a4,-20(s0)
800007a4: fdc42783 lw a5,-36(s0)
800007a8: fef746e3 blt a4,a5,80000794 <sleep+0x18>
800007ac: 00000013 nop
800007b0: 02c12403 lw s0,44(sp)
800007b4: 03010113 addi sp,sp,48
800007b8: 00008067 ret
800007bc <createWarps>:
800007bc: fc010113 addi sp,sp,-64
800007c0: 02112e23 sw ra,60(sp)
800007c4: 02812c23 sw s0,56(sp)
800007c8: 04010413 addi s0,sp,64
800007cc: fca42623 sw a0,-52(s0)
800007d0: fcb42423 sw a1,-56(s0)
800007d4: fcc42223 sw a2,-60(s0)
800007d8: fcd42023 sw a3,-64(s0)
800007dc: 00010913 mv s2,sp
800007e0: fe042623 sw zero,-20(s0)
800007e4: fe042423 sw zero,-24(s0)
800007e8: 08c0006f j 80000874 <createWarps+0xb8>
800007ec: ffff09b7 lui s3,0xffff0
800007f0: 01310133 add sp,sp,s3
800007f4: fe842783 lw a5,-24(s0)
800007f8: fcf42823 sw a5,-48(s0)
800007fc: fc842783 lw a5,-56(s0)
80000800: fcf42a23 sw a5,-44(s0)
80000804: 00010793 mv a5,sp
80000808: fcf42c23 sw a5,-40(s0)
8000080c: fc442783 lw a5,-60(s0)
80000810: fcf42e23 sw a5,-36(s0)
80000814: fc042783 lw a5,-64(s0)
80000818: fef42023 sw a5,-32(s0)
8000081c: fec42783 lw a5,-20(s0)
80000820: fef42223 sw a5,-28(s0)
80000824: fec42703 lw a4,-20(s0)
80000828: 4c400793 li a5,1220
8000082c: 02f70733 mul a4,a4,a5
80000830: 810017b7 lui a5,0x81001
80000834: d7478793 addi a5,a5,-652 # 81000d74 <main_sp+0xffffd9d8>
80000838: 00f707b3 add a5,a4,a5
8000083c: fd040713 addi a4,s0,-48
80000840: 00070593 mv a1,a4
80000844: 00078513 mv a0,a5
80000848: bd1ff0ef jal ra,80000418 <queue_enqueue>
8000084c: fec42783 lw a5,-20(s0)
80000850: 00178793 addi a5,a5,1
80000854: fef42623 sw a5,-20(s0)
80000858: fec42703 lw a4,-20(s0)
8000085c: 00600793 li a5,6
80000860: 00e7d463 bge a5,a4,80000868 <createWarps+0xac>
80000864: fe042623 sw zero,-20(s0)
80000868: fe842783 lw a5,-24(s0)
8000086c: 00178793 addi a5,a5,1
80000870: fef42423 sw a5,-24(s0)
80000874: fe842703 lw a4,-24(s0)
80000878: fcc42783 lw a5,-52(s0)
8000087c: f6f768e3 bltu a4,a5,800007ec <createWarps+0x30>
80000880: 00090113 mv sp,s2
80000884: e39ff0ef jal ra,800006bc <schedule_warps>
80000888: 00000013 nop
8000088c: 03c12083 lw ra,60(sp)
80000890: 03812403 lw s0,56(sp)
80000894: 04010113 addi sp,sp,64
80000898: 00008067 ret
8000089c <wait_for_done>:
8000089c: fd010113 addi sp,sp,-48
800008a0: 02812623 sw s0,44(sp)
800008a4: 03010413 addi s0,sp,48
800008a8: fca42e23 sw a0,-36(s0)
800008ac: fe0407a3 sb zero,-17(s0)
800008b0: 0500006f j 80000900 <wait_for_done+0x64>
800008b4: 00100793 li a5,1
800008b8: fef407a3 sb a5,-17(s0)
800008bc: fe042423 sw zero,-24(s0)
800008c0: 0340006f j 800008f4 <wait_for_done+0x58>
800008c4: fef44783 lbu a5,-17(s0)
800008c8: 81003737 lui a4,0x81003
800008cc: 39470693 addi a3,a4,916 # 81003394 <main_sp+0xfffffff8>
800008d0: fe842703 lw a4,-24(s0)
800008d4: 00e68733 add a4,a3,a4
800008d8: 00074703 lbu a4,0(a4)
800008dc: 00e7f7b3 and a5,a5,a4
800008e0: 00f037b3 snez a5,a5
800008e4: fef407a3 sb a5,-17(s0)
800008e8: fe842783 lw a5,-24(s0)
800008ec: 00178793 addi a5,a5,1
800008f0: fef42423 sw a5,-24(s0)
800008f4: fe842783 lw a5,-24(s0)
800008f8: fdc42703 lw a4,-36(s0)
800008fc: fce7e4e3 bltu a5,a4,800008c4 <wait_for_done+0x28>
80000900: fef44783 lbu a5,-17(s0)
80000904: 0017c793 xori a5,a5,1
80000908: 0ff7f793 andi a5,a5,255
8000090c: fa0794e3 bnez a5,800008b4 <wait_for_done+0x18>
80000910: 00000013 nop
80000914: 02c12403 lw s0,44(sp)
80000918: 03010113 addi sp,sp,48
8000091c: 00008067 ret
80000920 <get_1st_arg>:
80000920: ff010113 addi sp,sp,-16
80000924: 00812623 sw s0,12(sp)
80000928: 01712423 sw s7,8(sp)
8000092c: 01010413 addi s0,sp,16
80000930: 000b8793 mv a5,s7
80000934: 00078513 mv a0,a5
80000938: 00c12403 lw s0,12(sp)
8000093c: 00812b83 lw s7,8(sp)
80000940: 01010113 addi sp,sp,16
80000944: 00008067 ret
80000948 <get_2nd_arg>:
80000948: ff010113 addi sp,sp,-16
8000094c: 00812623 sw s0,12(sp)
80000950: 01812423 sw s8,8(sp)
80000954: 01010413 addi s0,sp,16
80000958: 000c0793 mv a5,s8
8000095c: 00078513 mv a0,a5
80000960: 00c12403 lw s0,12(sp)
80000964: 00812c03 lw s8,8(sp)
80000968: 01010113 addi sp,sp,16
8000096c: 00008067 ret
80000970 <get_3rd_arg>:
80000970: ff010113 addi sp,sp,-16
80000974: 00812623 sw s0,12(sp)
80000978: 01912423 sw s9,8(sp)
8000097c: 01010413 addi s0,sp,16
80000980: 000c8793 mv a5,s9
80000984: 00078513 mv a0,a5
80000988: 00c12403 lw s0,12(sp)
8000098c: 00812c83 lw s9,8(sp)
80000990: 01010113 addi sp,sp,16
80000994: 00008067 ret
Disassembly of section .rodata:
81000000 <.rodata>:
81000000: 0030 addi a2,sp,8
81000002: 0000 unimp
81000004: 0031 c.nop 12
81000006: 0000 unimp
81000008: 0032 c.slli zero,0xc
8100000a: 0000 unimp
8100000c: 00000033 add zero,zero,zero
81000010: 0034 addi a3,sp,8
81000012: 0000 unimp
81000014: 0035 c.nop 13
81000016: 0000 unimp
81000018: 0036 c.slli zero,0xd
8100001a: 0000 unimp
8100001c: 00000037 lui zero,0x0
81000020: 0038 addi a4,sp,8
81000022: 0000 unimp
81000024: 0039 c.nop 14
81000026: 0000 unimp
81000028: 0061 c.nop 24
8100002a: 0000 unimp
8100002c: 0062 c.slli zero,0x18
8100002e: 0000 unimp
81000030: 00000063 beqz zero,81000030 <get_3rd_arg+0xfff6c0>
81000034: 0064 addi s1,sp,12
81000036: 0000 unimp
81000038: 0065 c.nop 25
8100003a: 0000 unimp
8100003c: 0066 c.slli zero,0x19
8100003e: 0000 unimp
81000040: 2d2d jal 8100067a <y+0x126>
81000042: 2d2d jal 8100067c <y+0x128>
81000044: 2d2d jal 8100067e <y+0x12a>
81000046: 2d2d jal 81000680 <y+0x12c>
81000048: 2d2d jal 81000682 <y+0x12e>
8100004a: 2d2d jal 81000684 <y+0x130>
8100004c: 2d2d jal 81000686 <y+0x132>
8100004e: 2d2d jal 81000688 <y+0x134>
81000050: 2d2d jal 8100068a <y+0x136>
81000052: 2d2d jal 8100068c <y+0x138>
81000054: 2d2d jal 8100068e <y+0x13a>
81000056: 2d2d jal 81000690 <y+0x13c>
81000058: 0a2d addi s4,s4,11
8100005a: 0000 unimp
8100005c: 4946 lw s2,80(sp)
8100005e: 414e lw sp,208(sp)
81000060: 204c fld fa1,128(s0)
81000062: 0a5a slli s4,s4,0x16
81000064: 0000 unimp
81000066: 0000 unimp
81000068: 000a c.slli zero,0x2
8100006a: 0000 unimp
8100006c: 0020 addi s0,sp,8
8100006e: 0000 unimp
81000070: 2d0a fld fs10,128(sp)
81000072: 2d2d jal 810006ac <y+0x158>
81000074: 2d2d jal 810006ae <y+0x15a>
81000076: 2d2d jal 810006b0 <y+0x15c>
81000078: 2d2d jal 810006b2 <y+0x15e>
8100007a: 2d2d jal 810006b4 <y+0x160>
8100007c: 2d2d jal 810006b6 <y+0x162>
8100007e: 2d2d jal 810006b8 <y+0x164>
81000080: 2d2d jal 810006ba <y+0x166>
81000082: 2d2d jal 810006bc <y+0x168>
81000084: 2d2d jal 810006be <y+0x16a>
81000086: 2d2d jal 810006c0 <y+0x16c>
81000088: 2d2d jal 810006c2 <y+0x16e>
8100008a: 2d2d jal 810006c4 <y+0x170>
8100008c: 2d2d jal 810006c6 <y+0x172>
8100008e: 2d2d jal 810006c8 <y+0x174>
81000090: 000a c.slli zero,0x2
81000092: 0000 unimp
81000094: 0030 addi a2,sp,8
81000096: 0000 unimp
81000098: 0031 c.nop 12
8100009a: 0000 unimp
8100009c: 0032 c.slli zero,0xc
8100009e: 0000 unimp
810000a0: 00000033 add zero,zero,zero
810000a4: 0034 addi a3,sp,8
810000a6: 0000 unimp
810000a8: 0035 c.nop 13
810000aa: 0000 unimp
810000ac: 0036 c.slli zero,0xd
810000ae: 0000 unimp
810000b0: 00000037 lui zero,0x0
810000b4: 0038 addi a4,sp,8
810000b6: 0000 unimp
810000b8: 0039 c.nop 14
810000ba: 0000 unimp
810000bc: 0061 c.nop 24
810000be: 0000 unimp
810000c0: 0062 c.slli zero,0x18
810000c2: 0000 unimp
810000c4: 00000063 beqz zero,810000c4 <get_3rd_arg+0xfff754>
810000c8: 0064 addi s1,sp,12
810000ca: 0000 unimp
810000cc: 0065 c.nop 25
810000ce: 0000 unimp
810000d0: 0066 c.slli zero,0x19
Disassembly of section .data:
810000d4 <hextoa>:
810000d4: 0000 unimp
810000d6: 8100 0x8100
810000d8: 0004 0x4
810000da: 8100 0x8100
810000dc: 0008 0x8
810000de: 8100 0x8100
810000e0: 000c 0xc
810000e2: 8100 0x8100
810000e4: 0010 0x10
810000e6: 8100 0x8100
810000e8: 0014 0x14
810000ea: 8100 0x8100
810000ec: 0018 0x18
810000ee: 8100 0x8100
810000f0: 001c 0x1c
810000f2: 8100 0x8100
810000f4: 0020 addi s0,sp,8
810000f6: 8100 0x8100
810000f8: 0024 addi s1,sp,8
810000fa: 8100 0x8100
810000fc: 0028 addi a0,sp,8
810000fe: 8100 0x8100
81000100: 002c addi a1,sp,8
81000102: 8100 0x8100
81000104: 0030 addi a2,sp,8
81000106: 8100 0x8100
81000108: 0034 addi a3,sp,8
8100010a: 8100 0x8100
8100010c: 0038 addi a4,sp,8
8100010e: 8100 0x8100
81000110: 003c addi a5,sp,8
81000112: 8100 0x8100
81000114 <hextoa>:
81000114: 0094 addi a3,sp,64
81000116: 8100 0x8100
81000118: 0098 addi a4,sp,64
8100011a: 8100 0x8100
8100011c: 009c addi a5,sp,64
8100011e: 8100 0x8100
81000120: 00a0 addi s0,sp,72
81000122: 8100 0x8100
81000124: 00a4 addi s1,sp,72
81000126: 8100 0x8100
81000128: 00a8 addi a0,sp,72
8100012a: 8100 0x8100
8100012c: 00ac addi a1,sp,72
8100012e: 8100 0x8100
81000130: 00b0 addi a2,sp,72
81000132: 8100 0x8100
81000134: 00b4 addi a3,sp,72
81000136: 8100 0x8100
81000138: 00b8 addi a4,sp,72
8100013a: 8100 0x8100
8100013c: 00bc addi a5,sp,72
8100013e: 8100 0x8100
81000140: 00c0 addi s0,sp,68
81000142: 8100 0x8100
81000144: 00c4 addi s1,sp,68
81000146: 8100 0x8100
81000148: 00c8 addi a0,sp,68
8100014a: 8100 0x8100
8100014c: 00cc addi a1,sp,68
8100014e: 8100 0x8100
81000150: 00d0 addi a2,sp,68
81000152: 8100 0x8100
Disassembly of section .bss:
81000154 <x>:
...
81000554 <y>:
...
81000954 <z>:
...
81000d54 <done>:
...
81000d5c <main_sp>:
81000d5c: 0000 unimp
...
81000d60 <args>:
...
81000d74 <q>:
...
81003394 <done>:
...
8100339c <main_sp>:
8100339c: 0000 unimp
...
Disassembly of section .comment:
82000000 <.comment>:
82000000: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
82000004: 2820 fld fs0,80(s0)
82000006: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
8200000a: 3820 fld fs0,112(s0)
8200000c: 322e fld ft4,232(sp)
8200000e: 302e fld ft0,232(sp)
...

View file

@ -1,180 +0,0 @@
:0200000480007A
:1000000037F1FF7FEF00400C73000000938B060078
:10001000130D0700130F01009303050013051000D3
:10002000635C750013010180130305006B5003002E
:10003000130515006FF0DFFE13010F00130500001C
:10004000930F0600938D0300EBE0BF01170500003E
:100050001305055B6B400500B708010023A0B8003D
:100060006780000017030000130383FA6B0003008E
:1000700067800000130141FF232011002322B100FB
:100080008345050063880500EFF01FFD130515008B
:100090006FF01FFF83200100832541001301C10081
:1000A00067800000130141FF232011002322B100CB
:1000B00093050503EFF05FFA8320010083254100DB
:1000C0001301C10067800000130101FE232E1100FF
:1000D000232C810013040102232604FE6F00000379
:1000E0000327C4FE9307404C3307F702B71700817C
:1000F000938747D7B307F70013850700EF00802FDA
:100100008327C4FE938717002326F4FE0327C4FE2B
:1001100093077000E3D6E7FC232404FE6F008004FD
:10012000B7070081032784FE13172700938747151D
:10013000B307F7001307300023A0E700B7070081DB
:10014000032784FE1317270093874755B307F7004B
:100150001307200023A0E700832784FE938717005E
:100160002324F4FE032784FE9307F00FE3DAE7FA73
:10017000B7170081370700811307471523A0E7D67B
:10018000B7170081938707D63707008113074755B4
:1001900023A2E700B7170081938707D6371700819E
:1001A0001307479523A4E700B7170081938707D665
:1001B0001307000123A6E700B7170081938707D62E
:1001C0001307200023A8E700B7170081938607D6FE
:1001D000B70700801386C728930580001305000128
:1001E000EF00C05D13058000EF00406BB707008192
:1001F00013850704EFF01FE8B70700811385C705D3
:10020000EFF05FE7232204FE6F004005832744FEE2
:1002100093F7F70063980700B707008113858706F7
:10022000EFF05FE5B7170081032744FE131727009F
:1002300093874795B307F70083A707001385070047
:10024000EF00802FB70700811385C706EFF09FE20C
:10025000832744FE938717002322F4FE032744FEDE
:100260009307F00FE3D4E7FAB70700811385070778
:10027000EFF05FE093070000138507008320C101C2
:10028000032481011301010267800000130101FAB8
:10029000232E1104232C8104130401062326A4FA1F
:1002A0002324B4FAEF00C067232EA4FC8327C4FDE7
:1002B00083A70700232CF4FC8327C4FD83A74700F2
:1002C000232AF4FC8327C4FD83A787002328F4FC9A
:1002D0008327C4FD83A707012326F4FC0327C4FC5E
:1002E0008327C4FAB307F7022326F4FE8327C4FD4D
:1002F00083A7C7002324F4FC232404FE6F00800D91
:10030000232204FE232004FE6F008007032784FAC3
:10031000832784FCB307F702032704FEB307F70023
:100320002322F4FC032784FC832704FEB307F7028F
:100330000327C4FEB307F7002320F4FC832744FC03
:1003400093972700032784FDB307F70003A707004F
:10035000832704FC93972700832644FDB387F60088
:1003600083A70700B307F702032744FEB307F7008C
:100370002322F4FE832704FE938717002320F4FE34
:10038000032704FE832784FCE362F7F8032784FA3B
:10039000832784FC3307F7028327C4FEB307F700E3
:1003A000232EF4FA8327C4FB93972700032704FD29
:1003B000B307F700032744FE23A0E7008327C4FE0A
:1003C000938717002326F4FE832784FE9387170064
:1003D0002324F4FE832784FE0327C4FCE3E2E7F230
:1003E000130000008320C1050324810513010106C9
:1003F0006780000093020500130300009303700060
:1004000023A0620023A2620023A4620023A672003C
:1004100023A86200678000009302050003A3820006
:100420001303130023A462001383420183AE42002E
:1004300093935E003303730003AE05002320C301D2
:1004400003AE45002322C30103AE85002324C3016C
:1004500003AEC5002326C30103AE05012328C30153
:1004600003AE4501232AC301938E1E00130F200300
:100470006394EE01930E000023A2D2016780000076
:100480009302050003A382001303F3FF23A4620079
:100490001383420183AE0200930F2003138F0E00DB
:1004A000130F1F006314FF01130F000023A0E201CC
:1004B00093935E0033037300032E030023A0C50152
:1004C000032E430023A2C501032E830023A4C501EC
:1004D000032EC30023A6C501032E030123A8C501D3
:1004E000032E430123AAC501678000009302050083
:1004F00003A3820013050000130E200363146E0093
:1005000013051500678000009302050003A3820015
:1005100013050000130E000063146E001305150090
:10052000678000009302050003A3C20083A30201B9
:1005300033B5630067800000130101FD232611021B
:100540002324810213040103232EA4FC0327C4FDEA
:100550009307F00063E4E702B70700810327C4FDB7
:100560001317270093874711B307F70083A70700E6
:1005700013850700EFF01FB06F00400793070002DC
:100580002326F4FEA30504FE8327C4FE9387C7FF3A
:100590000327C4FDB357F70093F7F7002322F4FEB7
:1005A000832744FE6386070093071000A305F4FE2B
:1005B0008347B4FE63820702B7070081032744FE26
:1005C0001317270093874711B307F70083A7070086
:1005D00013850700EFF01FAA8327C4FE9387C7FF88
:1005E0002326F4FE8327C4FEE340F0FA8320C102F1
:1005F000032481021301010367800000130101FD40
:1006000023261102232481022322A10313040103C0
:1006100013070D009307404C3307F702B71700810B
:10062000938747D7B307F70013850700EFF0DFED97
:10063000930705006380070213070D00B737008199
:1006400093874739B307F700130710002380E700AB
:100650007300000013070D009307404C3307F702A7
:10066000B7170081938747D7B307F700130784FDB7
:100670009305070013850700EFF09FE0832704FE32
:10068000138107008327C4FD832584FD032644FED0
:10069000832684FE0327C4FE13850700EFF01F970F
:1006A00073000000130000008320C10203248102B4
:1006B000032D41021301010367800000130101FDB6
:1006C000232611022324810213040103930901004C
:1006D000232604FE6F0040080327C4FE9307404C06
:1006E0003307F702B7170081938747D7B307F7009F
:1006F00013850700EFF05FE1930705006398070497
:100700000327C4FE9307404C3307F702B717008155
:10071000938747D7B307F700130744FD93050700F6
:1007200013850700EFF0DFD58327C4FD1381070091
:10073000832784FD832544FD032604FE832644FE8F
:10074000032784FE13850700EFF0DF918327C4FEA3
:10075000938717002326F4FE0327C4FE9307600047
:10076000E3DCE7F613810900130000008320C102D7
:10077000032481021301010367800000130101FDBE
:100780002326810213040103232EA4FC232604FE46
:100790006F0000018327C4FE938717002326F4FE11
:1007A0000327C4FE8327C4FDE346F7FE13000000C1
:1007B0000324C1021301010367800000130101FC3F
:1007C000232E1102232C8102130401042326A4FCEE
:1007D0002324B4FC2322C4FC2320D4FC13090100ED
:1007E000232604FE232404FE6F00C008B709FFFF80
:1007F00033013101832784FE2328F4FC832784FC02
:10080000232AF4FC93070100232CF4FC832744FCE7
:10081000232EF4FC832704FC2320F4FE8327C4FE4C
:100820002322F4FE0327C4FE9307404C3307F7024C
:10083000B7170081938747D7B307F700130704FD65
:100840009305070013850700EFF01FBD8327C4FE43
:10085000938717002326F4FE0327C4FE9307600046
:1008600063D4E700232604FE832784FE93871700C2
:100870002324F4FE032784FE8327C4FCE368F7F6F1
:1008800013010900EFF09FE3130000008320C10370
:10089000032481031301010467800000130101FD9B
:1008A0002326810213040103232EA4FCA30704FEC4
:1008B0006F00000593071000A307F4FE232404FE35
:1008C0006F0040038347F4FE3737008193064739B2
:1008D000032784FE3387E60003470700B3F7E700EA
:1008E000B337F000A307F4FE832784FE9387170035
:1008F0002324F4FE832784FE0327C4FDE3E4E7FCFE
:100900008347F4FE93C7170093F7F70FE39407FAB2
:10091000130000000324C1021301010367800000DB
:10092000130101FF23268100232471011304010117
:1009300093870B00138507000324C100832B8100DC
:100940001301010167800000130101FF23268100CC
:10095000232481011304010193070C001385070070
:100960000324C100032C81001301010167800000F2
:10097000130101FF232681002324910113040101A7
:1009800093870C00138507000324C100832C81008A
:08099000130101016780000062
:02000004810079
:10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A
:10002000380000003900000061000000620000009C
:10003000630000006400000065000000660000002E
:100040002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DE0
:100050002D2D2D2D2D2D2D2D2D0A000046494E41E3
:100060004C205A0A000000000A0000002000000096
:100070000A2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DD3
:100080002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DA0
:100090000A000000300000003100000032000000C3
:1000A000330000003400000035000000360000007E
:1000B0003700000038000000390000006100000037
:1000C00062000000630000006400000065000000A2
:0200D0006600C8
:1000D4000000008104000081080000810C00008100
:1000E4001000008114000081180000811C000081B0
:1000F4002000008124000081280000812C00008160
:100104003000008134000081380000813C0000810F
:1001140094000081980000819C000081A00000816F
:10012400A4000081A8000081AC000081B00000811F
:10013400B4000081B8000081BC000081C0000081CF
:10014400C4000081C8000081CC000081D00000817F
:040000058000000077
:00000001FF

View file

@ -1,3 +0,0 @@
/opt/riscv/bin/riscv32-unknown-elf-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib ./lib/lib.s gpgpu_test.c ./lib/queue.s ./lib/lib.c -o gpgpu_test.elf
/opt/riscv/bin/riscv32-unknown-elf-objdump -D gpgpu_test.elf > gpgpu_test.dump
/opt/riscv/bin/riscv32-unknown-elf-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex

View file

@ -1,4 +1,4 @@
echo start > results.txt
echo ./riscv_gpgpu/gpgpu_test.hex
./harptool -E -a rv32i --core ./riscv_gpgpu/gpgpu_test.hex -s -b
echo ./vortex_software/vortex_test.hex
./harptool -E -a rv32i --core ./vortex_software/vortex_test.hex -s -b

BIN
src/riscv_gpgpu/lib/queue.elf → src/vortex_software/.DS_Store vendored Executable file → Normal file

Binary file not shown.

View file

@ -0,0 +1,21 @@
COMP = /opt/riscv/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib
DMP = /opt/riscv/bin/riscv32-unknown-elf-objdump
CPY = /opt/riscv/bin/riscv32-unknown-elf-objcopy
VX_LIB = ./vx_os/vx_back/vx_back.s ./vx_os/vx_back/vx_back.c ./vx_os/vx_util/queue.s
VX_IO = ./vx_os/vx_io/vx_io.s ./vx_os/vx_io/vx_io.c
VX_FR = ./vx_include/vx_front.c
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D vortex_test.elf > vortex_test.dump
HEX: ELF
$(CPY) -O ihex vortex_test.elf vortex_test.hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_LIB) $(VX_IO) $(VX_FR) vx_main.c -o vortex_test.elf

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,202 @@
:0200000480007A
:1000000037F1FF7FEF008006EF005018730000000B
:10001000938B0600130D0700130F010093030500D7
:1000200013051000635C75001301018013030500C4
:100030006B500300130515006FF0DFFE13010F0076
:1000400013050000930F0600938D0300EBE0BF0142
:1000500017050000130505086B4005001703000095
:10006000130343FB6B00030067800000130101FED4
:10007000232E1100232C810013040102232604FEE9
:100080006F0000030327C4FE9307404C3307F702B9
:10009000B70700819387071FB307F7001385070091
:1000A000EF00C0338327C4FE938717002326F4FE96
:1000B0000327C4FE93077000E3D6E7FC130000009B
:1000C0008320C10103248101130101026780000024
:1000D000130101FD23261102232481022322A103FF
:1000E0001304010313070D009307404C3307F70275
:1000F000B70700819387071FB307F7001385070031
:10010000EF00003F930705006380070213070D000F
:10011000B70700819387471EB307F7001307100046
:100120002380E7007300000013070D009307404C85
:100130003307F702B70700819387071FB307F7005C
:10014000130784FD9305070013850700EF00C031F6
:10015000832704FE138107008327C4FD832584FDC4
:10016000032644FE832684FE0327C4FE138507006E
:10017000EFF01FEA73000000130000008320C102AB
:1001800003248102032D4102130101036780000053
:10019000130101FD2326110223248102130401030C
:1001A00093090100232604FE6F0040080327C4FEC4
:1001B0009307404C3307F702B70700819387071F67
:1001C000B307F70013850700EF008032930705009F
:1001D000639807040327C4FE9307404C3307F702D4
:1001E000B70700819387071FB307F700130744FD84
:1001F0009305070013850700EF0000278327C4FD40
:1002000013810700832784FD832544FD032604FE14
:10021000832644FE032784FE13850700EFF01FE4C6
:100220008327C4FE938717002326F4FE0327C4FE0A
:1002300093076000E3DCE7F6138109001300000078
:100240008320C1020324810213010103678000009F
:10025000130101FC232E1102232C8102130401043B
:100260002326A4FC2324B4FC2322C4FC2320D4FC96
:1002700013090100232604FE232404FE6F00C00896
:10028000B709FFFF33013101832784FE2328F4FCE3
:10029000832784FC232AF4FC93070100232CF4FC1D
:1002A000832744FC232EF4FC832704FC2320F4FE44
:1002B0008327C4FE2322F4FE0327C4FE9307404C89
:1002C0003307F702B70700819387071FB307F700CB
:1002D000130704FD9305070013850700EF00401284
:1002E0008327C4FE938717002326F4FE0327C4FE4A
:1002F0009307600063D4E700232604FE832784FE6F
:10030000938717002324F4FE032784FE8327C4FC6D
:10031000E368F7F613010900EFF09FE71300000010
:100320008320C103032481031301010467800000BB
:10033000130101FD2326810213040103232EA4FCD3
:10034000A30704FE6F00000593071000A307F4FE47
:10035000232404FE6F0040038347F4FE3707008127
:100360009306471E032784FE3387E60003470700F2
:10037000B3F7E700B337F000A307F4FE832784FE4A
:10038000938717002324F4FE832784FE0327C4FDEC
:10039000E3E4E7FC8347F4FE93C7170093F7F70FF6
:1003A000E39407FA130000000324C10213010103C0
:1003B00067800000130101FF2326810023247101BF
:1003C0001304010193870B00138507000324C10068
:1003D000832B810013010101678000009302050057
:1003E000130300009303700023A0620023A26200A5
:1003F00023A4620023A6720023A862006780000085
:100400009302050003A382001303130023A46200D8
:100410001383420183AE420093935E003303730063
:1004200003AE05002320C30103AE45002322C30110
:1004300003AE85002324C30103AEC5002326C301F8
:1004400003AE05012328C30103AE4501232AC301DE
:10045000938E1E00130F20036394EE01930E000091
:1004600023A2D201678000009302050003A382004B
:100470001303F3FF23A462001383420183AE02003F
:10048000930F2003138F0E00130F1F006314FF013F
:10049000130F000023A0E20193935E003303730067
:1004A000032E030023A0C501032E430023A2C50190
:1004B000032E830023A4C501032EC30023A6C50178
:1004C000032E030123A8C501032E430123AAC5015E
:1004D000678000009302050003A38200130500005B
:1004E000130E200363146E001305150067800000CF
:1004F0009302050003A3820013050000130E000001
:1005000063146E0013051500678000009302050058
:1005100003A3C20083A3020133B563006780000018
:10052000130141FF232011002322B1008345050060
:1005300063880500EF00C001130515006FF01FFF71
:1005400083200100832541001301C1006780000062
:10055000B708010023A0B80067800000130101FD67
:10056000232611022324810213040103232EA4FC59
:100570000327C4FD9307F00063E4E702B707008197
:100580000327C4FD1317270093874712B307F7000B
:1005900083A7070013850700EFF09FF86F0040075F
:1005A000930700022326F4FEA30504FE8327C4FE5E
:1005B0009387C7FF0327C4FDB357F70093F7F700EE
:1005C0002322F4FE832744FE63860700930710006E
:1005D000A305F4FE8347B4FE63820702B7070081D8
:1005E000032744FE1317270093874712B307F7002A
:1005F00083A7070013850700EFF09FF28327C4FE4F
:100600009387C7FF2326F4FE8327C4FEE340F0FA56
:100610008320C102032481021301010367800000CB
:10062000130101FD23261102232481021304010377
:10063000232EA4FC232CB4FC232AC4FC2328D4FCA2
:10064000B73700810327C4FD23AEE780B7370081A9
:100650009387C781032784FD23A2E700B737008172
:100660009387C781032744FD23A4E700B7370081A0
:100670009387C781032704FD23A6E700832704FD92
:1006800093D737002326F4FE832704FD93F77700E2
:10069000638807008327C4FE938717002326F4FE90
:1006A000B73700819387C7810327C4FE23A8E700DB
:1006B000B707008113850708EFF09FE60325C4FE06
:1006C000EFF0DFE9B70700811385C708EFF05FE5BA
:1006D000032704FD9307700063F2E702B737008138
:1006E0009386C781B70700801386C774930580007F
:1006F000032504FDEFF0DFB56F000002B73700817E
:100700009386C781B70700801386C774832504FDCD
:10071000032504FDEFF0DFB3032704FD930770000A
:1007200063F8E70013058000EFF09FC06F00C00082
:10073000032504FDEFF0DFBF130000008320C1029A
:10074000032481021301010367800000130101FAF1
:10075000232E1104232C8104130401062326A4FA5A
:100760002324B4FAEFF01FC5232CA4FC832784FDB7
:1007700083A70700232AF4FC832784FD83A747006F
:100780002328F4FC832784FD83A787002326F4FC19
:10079000832784FD83A707012326F4FE0327C4FED5
:1007A0008327C4FAB307F7022324F4FE8327C4FE89
:1007B000639A0700930710002326F4FE8327C4FAE8
:1007C0002324F4FE832784FD83A7C7002324F4FC9D
:1007D000232204FE6F004012232004FE232E04FC7B
:1007E0006F008007032784FA832784FCB307F7028E
:1007F0000327C4FDB307F7002322F4FC032784FC7E
:100800008327C4FDB307F702032784FEB307F7006D
:100810002320F4FC832744FC93972700032744FDFF
:10082000B307F70003A70700832704FC939727006B
:10083000832604FDB387F60083A70700B307F702FA
:10084000032704FEB307F7002320F4FE8327C4FD2B
:1008500093871700232EF4FC0327C4FD832784FC11
:10086000E362F7F8032784FA832784FC3307F7024F
:10087000832784FEB307F700232EF4FA032784FEB0
:10088000832784FCB337F70093F7F70F232CF4FA90
:10089000832784FB93B71700A30BF4FA834774FBF9
:1008A000138F0700B7170080938F478E6B200F00C0
:1008B0007B70FF018327C4FB939727000327C4FCA9
:1008C000B307F700032704FE23A0E700832784FE75
:1008D000938717002324F4FEB7170080138E878EAA
:1008E00067000E00130000006B300000832744FEF9
:1008F000938717002322F4FE832744FE0327C4FEB8
:10090000E3ECE7EC130000008320C105032481051C
:100910001301010667800000130101FE232E8100F0
:1009200013040102232604FE6F008004B737008100
:100930000327C4FE1317270093870783B307F70025
:100940001307300023A0E700B74700810327C4FE48
:100950001317270093870783B307F70013072000B7
:1009600023A0E7008327C4FE938717002326F4FE05
:100970000327C4FE9307F008E3DAE7FA1300000048
:100980000324C1011301010267800000130101FE6D
:10099000232E1100232C810013040102EFF0DFF756
:1009A0009306C000B757008113860783B7470081BD
:1009B00093850783B737008113850783EFF05FC600
:1009C000B70700811385070DEFF09FB5B7070081CA
:1009D0001385C70EEFF0DFB4232604FE6F008005F9
:1009E0000327C4FE9307C000B367F70263980700AC
:1009F000B70700811385870FEFF09FB2B7570081CB
:100A00000327C4FE1317270093870783B307F70054
:100A100083A7070013850700EFF05FB4B7070081D5
:100A20001385C70FEFF0DFAF8327C4FE938717004E
:100A30002326F4FE0327C4FE9307F008E3D2E7FA67
:100A4000B707008113850710EFF09FAD93070000F3
:100A5000138507008320C1010324810113010102D2
:040A600067800000AB
:02000004810079
:10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A
:10002000380000003900000061000000620000009C
:10003000630000006400000065000000660000002E
:1000400030000000310000003200000033000000EA
:1000500034000000350000003600000037000000CA
:10006000380000003900000061000000620000005C
:1000700063000000640000006500000066000000EE
:100080006F66667365743A20000000000A00000085
:10009000300000003100000032000000330000009A
:1000A000340000003500000036000000370000007A
:1000B000380000003900000061000000620000000C
:1000C000630000006400000065000000660000009E
:1000D0002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2D50
:1000E0002D2D2D2D2D2D2D2D2D0A000046494E4153
:1000F0004C205A0A000000000A0000002000000006
:100100000A2D2D2D2D2D2D2D2D2D2D2D2D2D2D2D42
:100110002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2D0F
:020120000A00D3
:100124000000008104000081080000810C000081AF
:100134001000008114000081180000811C0000815F
:100144002000008124000081280000812C0000810F
:100154003000008134000081380000813C000081BF
:100164004000008144000081480000814C0000816F
:100174005000008154000081580000815C0000811F
:100184006000008164000081680000816C000081CF
:100194007000008174000081780000817C0000817F
:1001A4009000008194000081980000819C000081EF
:1001B400A0000081A4000081A8000081AC0000819F
:1001C400B0000081B4000081B8000081BC0000814F
:1001D400C0000081C4000081C8000081CC000081FF
:040000058000000077
:00000001FF

View file

@ -0,0 +1,94 @@
#include "vx_front.h"
// -------------------------- Matrix Multiplication --------------------------
static mat_mult_arg_t args;
void _vx_matMult(unsigned, unsigned);
void vx_sq_mat_mult(void * x, void * y, void * z, unsigned mat_dim)
{
args.x = x;
args.y = y;
args.z = z;
args.mat_dim = mat_dim;
unsigned off = (mat_dim/MAX_THREADS);
if ((mat_dim%MAX_THREADS) != 0)
{
off += 1;
}
args.offset = off;
vx_print_str("offset: ");
vx_print_hex(off);
vx_print_str("\n");
if (mat_dim >= 8)
{
vx_spawnWarps(mat_dim, MAX_THREADS, _vx_matMult, (void *) (&args));
}
else
{
vx_spawnWarps(mat_dim, mat_dim, _vx_matMult, (void *) (&args));
}
if (mat_dim > 7)
{
vx_wait_for_warps(MAX_WARPS);
}
else
{
vx_wait_for_warps(mat_dim);
}
}
void _vx_matMult(unsigned tid, unsigned wid)
{
mat_mult_arg_t * args = (mat_mult_arg_t *) vx_get_arg_struct();
unsigned * x_ptr = args->x;
unsigned * y_ptr = args->y;
unsigned * z_ptr = args->z;
unsigned off = args->offset;
unsigned i_index = off * tid;
if (off == 0)
{
off = 1;
i_index = tid;
}
unsigned mat_dim = args->mat_dim;
for (int iter = 0; iter < off; ++iter)
{
unsigned total = 0;
for (unsigned place = 0; place < mat_dim; ++place)
{
unsigned x_i = (wid * mat_dim) + place;
unsigned y_i = (mat_dim * place) + i_index;
total += (x_ptr[x_i] * y_ptr[y_i]);
}
int final_i = (wid * mat_dim) + i_index;
unsigned cond = i_index < mat_dim;
__if(cond)
{
z_ptr[final_i] = total;
i_index++;
}
__else
__end_if
}
return;
}

View file

@ -0,0 +1,20 @@
#include "../vx_os/vx_back/vx_back.h"
#include "../vx_os/vx_io/vx_io.h"
#define MAX_WARPS 8
#define MAX_THREADS 8
// -------------------------- Matrix Multiplication --------------------------
typedef struct
{
unsigned * x;
unsigned * y;
unsigned * z;
unsigned mat_dim;
unsigned offset;
} mat_mult_arg_t;
void vx_sq_mat_mult(void *, void *, void *, unsigned);

View file

@ -0,0 +1,38 @@
#include "./vx_include/vx_front.h"
unsigned x[1024] = {0};
unsigned y[1024] = {0};
unsigned z[1024] = {0};
#define MAT_DIM 16
void initialize_mats()
{
for (int i = 0; i < (MAT_DIM * MAT_DIM); i++)
{
x[i] = 3;
y[i] = 2;
}
}
int main()
{
initialize_mats();
vx_sq_mat_mult(x, y, z, MAT_DIM);
vx_print_str("-------------------------\n");
vx_print_str("FINAL Z\n");
for (int j = 0; j < (MAT_DIM * MAT_DIM); j++)
{
if ((j % MAT_DIM) == 0) vx_print_str("\n");
vx_print_hex(z[j]);
vx_print_str(" ");
}
vx_print_str("\n-------------------------------\n");
return 0;
}

BIN
src/vortex_software/vx_os/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -1,32 +1,18 @@
#include "lib.h"
#include "vx_back.h"
extern void createThreads(unsigned, unsigned, unsigned, void *, unsigned);
extern void wspawn(unsigned, unsigned, unsigned, void *, unsigned);
extern void print_consol(char *);
extern void printc(char);
extern void vx_createThreads(unsigned, unsigned, unsigned, void *, unsigned);
extern void vx_wspawn(unsigned, unsigned, unsigned, void *, unsigned);
void int_print(unsigned f)
void vx_before_main()
{
if (f < 16)
for (int i = 0; i < 8; i++)
{
print_consol(hextoa[f]);
return;
queue_initialize(q + i);
}
int temp;
int sf = 32;
bool start = false;
do
{
temp = (f >> (sf - 4)) & 0xf;
if (temp != 0) start = true;
if (start) print_consol(hextoa[temp]);
sf -= 4;
} while(sf > 0);
}
void reschedule_warps()
void vx_reschedule_warps()
{
register unsigned curr_warp asm("s10");
@ -40,13 +26,13 @@ void reschedule_warps()
Job j;
queue_dequeue(q+curr_warp,&j);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
vx_createThreads(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
ECALL;
}
void schedule_warps()
void vx_schedule_warps()
{
asm __volatile__("mv s3, sp");
@ -57,7 +43,7 @@ void schedule_warps()
Job j;
queue_dequeue(q+curr_warp,&j);
asm __volatile__("mv sp,%0"::"r" (j.base_sp):);
wspawn(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
vx_wspawn(j.n_threads, j.wid, j.func_ptr, j.args, j.assigned_warp);
}
}
@ -65,14 +51,9 @@ void schedule_warps()
}
void sleep(int t)
{
for(int z = 0; z < t; z++) {}
}
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * args)
void vx_spawnWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * args)
{
asm __volatile__("addi s2, sp, 0");
int warp = 0;
@ -97,11 +78,11 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void * args)
asm __volatile__("addi sp, s2, 0");
schedule_warps();
vx_schedule_warps();
}
void wait_for_done(unsigned num_wait)
void vx_wait_for_warps(unsigned num_wait)
{
bool temp = false;
while (!temp)
@ -115,19 +96,11 @@ void wait_for_done(unsigned num_wait)
}
void * get_1st_arg(void)
void * vx_get_arg_struct(void)
{
register void *ret asm("s7");
return ret;
}
void * get_2nd_arg(void)
{
register void *ret asm("s8");
return ret;
}
void * get_3rd_arg(void)
{
register void *ret asm("s9");
return ret;
}

View file

@ -1,8 +1,9 @@
#ifndef __RISCV_GP_
#define __RISCV_GP_
#pragma once
#include <stdbool.h>
#include "queue.h"
#include "../vx_util/queue.h"
#define WSPAWN asm __volatile__(".word 0x3006b"::);
#define CLONE asm __volatile__(".word 0x3506b":::);
@ -28,28 +29,15 @@
#define __end_if AFTER:\
JOIN;
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
static bool done[] = {false, false, false, false, false, false, false};
static int main_sp[1];
#define FUNC void (func)(unsigned, unsigned)
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, void *);
void reschedule_warps(void);
void int_print(unsigned);
void wait_for_done(unsigned);
void * get_1st_arg(void);
void * get_2nd_arg(void);
void * get_3rd_arg(void);
void sleep(int);
void vx_spawnWarps(unsigned num_Warps, unsigned num_threads, FUNC, void *);
void vx_schedule_warps(void);
void vx_reschedule_warps(void);
void vx_wait_for_warps(unsigned);
void * vx_get_arg_struct(void);
#endif

View file

@ -7,12 +7,13 @@
.global _start
_start:
lui sp, 0x7ffff
jal vx_before_main
jal main
ecall
.type createThreads, @function
.global createThreads
createThreads:
.type vx_createThreads, @function
.global vx_createThreads
vx_createThreads:
mv s7 ,a3 # Moving args to s7
mv s10,a4 # Moving assigned_warp to s10
mv t5 ,sp # Saving the current stack pointer to t5
@ -34,51 +35,14 @@ loop_done:
mv t6,a2 # setting func_addr
mv s11,t2 # setting num_threads to spawn
.word 0x1bfe0eb
la a0, reschedule_warps
la a0, vx_reschedule_warps
.word 0x5406b
.type printc, @function
.global printc
printc:
la a7, 0x00010000
sw a1, 0(a7)
ret
.type wspawn, @function
.global wspawn
wspawn:
la t1, createThreads
.type vx_wspawn, @function
.global vx_wspawn
vx_wspawn:
la t1, vx_createThreads
.word 0x3006b # WSPAWN instruction
ret
.type print_consol, @function
.global print_consol
print_consol:
addi sp, sp, -12
sw ra, 0(sp)
sw a1, 4(sp)
bl:
lbu a1,0(a0)
beqz a1,be
jal printc
addi a0, a0, 1
j bl
be:
lw ra, 0(sp)
lw a1, 4(sp)
addi sp, sp, 12
ret
.type print_int, @function
.global print_int
print_int:
addi sp, sp, -12
sw ra, 0(sp)
sw a1, 4(sp)
addi a1, a0, 48
jal printc
lw ra, 0(sp)
lw a1, 4(sp)
addi sp, sp, 12
ret

Binary file not shown.

View file

@ -0,0 +1,21 @@
#include "vx_io.h"
void vx_print_hex(unsigned f)
{
if (f < 16)
{
vx_print_str(hextoa[f]);
return;
}
int temp;
int sf = 32;
bool start = false;
do
{
temp = (f >> (sf - 4)) & 0xf;
if (temp != 0) start = true;
if (start) vx_print_str(hextoa[temp]);
sf -= 4;
} while(sf > 0);
}

View file

@ -0,0 +1,8 @@
#pragma once
#include <stdbool.h>
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
void vx_print_hex(unsigned);
void vx_print_str(char *);

View file

@ -0,0 +1,30 @@
.type vx_print_str, @function
.global vx_print_str
vx_print_str:
addi sp, sp, -12
sw ra, 0(sp)
sw a1, 4(sp)
bl:
lbu a1,0(a0)
beqz a1,be
jal vx_printc
addi a0, a0, 1
j bl
be:
lw ra, 0(sp)
lw a1, 4(sp)
addi sp, sp, 12
ret
.type vx_printc, @function
.global vx_printc
vx_printc:
la a7, 0x00010000
sw a1, 0(a7)
ret

Binary file not shown.