mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
MWMT tested + minor opt
This commit is contained in:
parent
6c493cc4de
commit
6935d52c39
10 changed files with 315 additions and 295 deletions
|
@ -256,6 +256,7 @@ int emu_main(int argc, char **argv) {
|
|||
mu.attach(console, 1ll<<(arch.getWordSize()*8 - 1));
|
||||
// mu.attach(console, 0xf0000000);
|
||||
|
||||
// core.w[0].pc = 0x8000007c; // If I want to start at a specific location
|
||||
std::cout << "ABOUT TO START\n";
|
||||
while (core.running()) { console.poll(); core.step(); }
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace Harp {
|
|||
nRegs = 32;
|
||||
nPRegs = 0;
|
||||
nThds = 8;
|
||||
nWarps = 3;
|
||||
nWarps = 8;
|
||||
|
||||
extent = EXT_WARPS;
|
||||
|
||||
|
|
|
@ -3,21 +3,53 @@ start
|
|||
ABOUT TO START
|
||||
INTERRUPT ECALL/EBREAK
|
||||
INTERRUPT ECALL/EBREAK
|
||||
Total steps: 274
|
||||
Total insts: 1561
|
||||
INTERRUPT ECALL/EBREAK
|
||||
INTERRUPT ECALL/EBREAK
|
||||
Total steps: 300
|
||||
Total insts: 1503
|
||||
=== Warp 0 ===
|
||||
Steps : 274
|
||||
Insts : 827
|
||||
Steps : 300
|
||||
Insts : 504
|
||||
Loads : 0
|
||||
Stores: 177
|
||||
Stores: 112
|
||||
GRADE: FAILED 0
|
||||
=== Warp 1 ===
|
||||
Steps : 181
|
||||
Insts : 734
|
||||
Steps : 129
|
||||
Insts : 333
|
||||
Loads : 0
|
||||
Stores: 157
|
||||
Stores: 76
|
||||
GRADE: FAILED 0
|
||||
=== Warp 2 ===
|
||||
Steps : 129
|
||||
Insts : 333
|
||||
Loads : 0
|
||||
Stores: 76
|
||||
GRADE: FAILED 0
|
||||
=== Warp 3 ===
|
||||
Steps : 129
|
||||
Insts : 333
|
||||
Loads : 0
|
||||
Stores: 76
|
||||
GRADE: FAILED 0
|
||||
=== Warp 4 ===
|
||||
Steps : 0
|
||||
Insts : 0
|
||||
Loads : 0
|
||||
Stores: 0
|
||||
GRADE: FAILED 0
|
||||
=== Warp 5 ===
|
||||
Steps : 0
|
||||
Insts : 0
|
||||
Loads : 0
|
||||
Stores: 0
|
||||
GRADE: FAILED 0
|
||||
=== Warp 6 ===
|
||||
Steps : 0
|
||||
Insts : 0
|
||||
Loads : 0
|
||||
Stores: 0
|
||||
GRADE: FAILED 0
|
||||
=== Warp 7 ===
|
||||
Steps : 0
|
||||
Insts : 0
|
||||
Loads : 0
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
|
||||
int main(void);
|
||||
void matAddition ();
|
||||
void matAddition (unsigned, unsigned);
|
||||
|
||||
#include "./lib/lib.h"
|
||||
|
||||
|
@ -13,8 +13,8 @@ unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9};
|
|||
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2};
|
||||
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
#define NUM_WARPS 2
|
||||
#define NUM_THREADS 8
|
||||
#define NUM_WARPS 4
|
||||
#define NUM_THREADS 4
|
||||
|
||||
int main()
|
||||
{
|
||||
|
@ -27,14 +27,13 @@ int main()
|
|||
}
|
||||
|
||||
|
||||
void matAddition(unsigned tid)
|
||||
void matAddition(unsigned tid, unsigned wid)
|
||||
{
|
||||
unsigned wid = get_wid();
|
||||
unsigned * x_ptr = get_1st_arg();
|
||||
unsigned * y_ptr = get_2nd_arg();
|
||||
unsigned * z_ptr = get_3rd_arg();
|
||||
|
||||
unsigned i = (wid * 8) + tid;
|
||||
unsigned i = (wid * NUM_THREADS) + tid;
|
||||
|
||||
z_ptr[i] = x_ptr[i] + y_ptr[i];
|
||||
}
|
|
@ -9,7 +9,7 @@ Disassembly of section .text:
|
|||
80000004: 00112623 sw ra,12(sp)
|
||||
80000008: 00812423 sw s0,8(sp)
|
||||
8000000c: 01010413 addi s0,sp,16
|
||||
80000010: 354000ef jal ra,80000364 <initiate_stack>
|
||||
80000010: 348000ef jal ra,80000358 <initiate_stack>
|
||||
80000014: 810007b7 lui a5,0x81000
|
||||
80000018: 00078793 mv a5,a5
|
||||
8000001c: 81000737 lui a4,0x81000
|
||||
|
@ -18,9 +18,9 @@ Disassembly of section .text:
|
|||
80000028: 04068693 addi a3,a3,64 # 81000040 <y+0xffffffc0>
|
||||
8000002c: 80000637 lui a2,0x80000
|
||||
80000030: 05860613 addi a2,a2,88 # 80000058 <y+0xfeffffd8>
|
||||
80000034: 00800593 li a1,8
|
||||
80000038: 00200513 li a0,2
|
||||
8000003c: 1e0000ef jal ra,8000021c <createWarps>
|
||||
80000034: 00400593 li a1,4
|
||||
80000038: 00400513 li a0,4
|
||||
8000003c: 1d4000ef jal ra,80000210 <createWarps>
|
||||
80000040: 00000793 li a5,0
|
||||
80000044: 00078513 mv a0,a5
|
||||
80000048: 00c12083 lw ra,12(sp)
|
||||
|
@ -29,225 +29,222 @@ Disassembly of section .text:
|
|||
80000054: 00008067 ret
|
||||
|
||||
80000058 <matAddition>:
|
||||
80000058: fc010113 addi sp,sp,-64
|
||||
8000005c: 02112e23 sw ra,60(sp)
|
||||
80000060: 02812c23 sw s0,56(sp)
|
||||
80000064: 04010413 addi s0,sp,64
|
||||
80000068: fca42623 sw a0,-52(s0)
|
||||
8000006c: 258000ef jal ra,800002c4 <get_wid>
|
||||
80000070: fea42623 sw a0,-20(s0)
|
||||
80000074: 278000ef jal ra,800002ec <get_1st_arg>
|
||||
80000078: fea42423 sw a0,-24(s0)
|
||||
8000007c: 298000ef jal ra,80000314 <get_2nd_arg>
|
||||
80000080: fea42223 sw a0,-28(s0)
|
||||
80000084: 2b8000ef jal ra,8000033c <get_3rd_arg>
|
||||
80000088: fea42023 sw a0,-32(s0)
|
||||
8000008c: fec42783 lw a5,-20(s0)
|
||||
80000090: 00379793 slli a5,a5,0x3
|
||||
80000094: fcc42703 lw a4,-52(s0)
|
||||
80000098: 00f707b3 add a5,a4,a5
|
||||
8000009c: fcf42e23 sw a5,-36(s0)
|
||||
800000a0: fdc42783 lw a5,-36(s0)
|
||||
800000a4: 00279793 slli a5,a5,0x2
|
||||
800000a8: fe842703 lw a4,-24(s0)
|
||||
800000ac: 00f707b3 add a5,a4,a5
|
||||
800000b0: 0007a683 lw a3,0(a5) # 81000000 <y+0xffffff80>
|
||||
800000b4: fdc42783 lw a5,-36(s0)
|
||||
800000b8: 00279793 slli a5,a5,0x2
|
||||
800000bc: fe442703 lw a4,-28(s0)
|
||||
800000c0: 00f707b3 add a5,a4,a5
|
||||
800000c4: 0007a703 lw a4,0(a5)
|
||||
800000c8: fdc42783 lw a5,-36(s0)
|
||||
800000cc: 00279793 slli a5,a5,0x2
|
||||
800000d0: fe042603 lw a2,-32(s0)
|
||||
800000d4: 00f607b3 add a5,a2,a5
|
||||
800000d8: 00e68733 add a4,a3,a4
|
||||
800000dc: 00e7a023 sw a4,0(a5)
|
||||
800000e0: 00000013 nop
|
||||
800000e4: 03c12083 lw ra,60(sp)
|
||||
800000e8: 03812403 lw s0,56(sp)
|
||||
800000ec: 04010113 addi sp,sp,64
|
||||
800000f0: 00008067 ret
|
||||
80000058: fd010113 addi sp,sp,-48
|
||||
8000005c: 02112623 sw ra,44(sp)
|
||||
80000060: 02812423 sw s0,40(sp)
|
||||
80000064: 03010413 addi s0,sp,48
|
||||
80000068: fca42e23 sw a0,-36(s0)
|
||||
8000006c: fcb42c23 sw a1,-40(s0)
|
||||
80000070: 270000ef jal ra,800002e0 <get_1st_arg>
|
||||
80000074: fea42623 sw a0,-20(s0)
|
||||
80000078: 290000ef jal ra,80000308 <get_2nd_arg>
|
||||
8000007c: fea42423 sw a0,-24(s0)
|
||||
80000080: 2b0000ef jal ra,80000330 <get_3rd_arg>
|
||||
80000084: fea42223 sw a0,-28(s0)
|
||||
80000088: fd842783 lw a5,-40(s0)
|
||||
8000008c: 00279793 slli a5,a5,0x2
|
||||
80000090: fdc42703 lw a4,-36(s0)
|
||||
80000094: 00f707b3 add a5,a4,a5
|
||||
80000098: fef42023 sw a5,-32(s0)
|
||||
8000009c: fe042783 lw a5,-32(s0)
|
||||
800000a0: 00279793 slli a5,a5,0x2
|
||||
800000a4: fec42703 lw a4,-20(s0)
|
||||
800000a8: 00f707b3 add a5,a4,a5
|
||||
800000ac: 0007a683 lw a3,0(a5) # 81000000 <y+0xffffff80>
|
||||
800000b0: fe042783 lw a5,-32(s0)
|
||||
800000b4: 00279793 slli a5,a5,0x2
|
||||
800000b8: fe842703 lw a4,-24(s0)
|
||||
800000bc: 00f707b3 add a5,a4,a5
|
||||
800000c0: 0007a703 lw a4,0(a5)
|
||||
800000c4: fe042783 lw a5,-32(s0)
|
||||
800000c8: 00279793 slli a5,a5,0x2
|
||||
800000cc: fe442603 lw a2,-28(s0)
|
||||
800000d0: 00f607b3 add a5,a2,a5
|
||||
800000d4: 00e68733 add a4,a3,a4
|
||||
800000d8: 00e7a023 sw a4,0(a5)
|
||||
800000dc: 00000013 nop
|
||||
800000e0: 02c12083 lw ra,44(sp)
|
||||
800000e4: 02812403 lw s0,40(sp)
|
||||
800000e8: 03010113 addi sp,sp,48
|
||||
800000ec: 00008067 ret
|
||||
|
||||
800000f4 <createThreads>:
|
||||
800000f4: fb010113 addi sp,sp,-80
|
||||
800000f8: 04812623 sw s0,76(sp)
|
||||
800000fc: 04912423 sw s1,72(sp)
|
||||
80000100: 05212223 sw s2,68(sp)
|
||||
80000104: 05312023 sw s3,64(sp)
|
||||
80000108: 03412e23 sw s4,60(sp)
|
||||
8000010c: 03a12c23 sw s10,56(sp)
|
||||
80000110: 03b12a23 sw s11,52(sp)
|
||||
80000114: 05010413 addi s0,sp,80
|
||||
80000118: fca42623 sw a0,-52(s0)
|
||||
8000011c: fcb42423 sw a1,-56(s0)
|
||||
80000120: fcc42223 sw a2,-60(s0)
|
||||
80000124: fcd42023 sw a3,-64(s0)
|
||||
80000128: fae42e23 sw a4,-68(s0)
|
||||
8000012c: faf42c23 sw a5,-72(s0)
|
||||
80000130: fc042903 lw s2,-64(s0)
|
||||
80000134: fbc42983 lw s3,-68(s0)
|
||||
80000138: fb842a03 lw s4,-72(s0)
|
||||
8000013c: fc842483 lw s1,-56(s0)
|
||||
80000140: 00010f13 mv t5,sp
|
||||
80000144: 00100793 li a5,1
|
||||
80000148: fcf42e23 sw a5,-36(s0)
|
||||
8000014c: 0200006f j 8000016c <createThreads+0x78>
|
||||
80000150: fdc42503 lw a0,-36(s0)
|
||||
80000154: fdc42303 lw t1,-36(s0)
|
||||
80000158: f0010113 addi sp,sp,-256
|
||||
8000015c: 0003506b 0x3506b
|
||||
80000160: fdc42783 lw a5,-36(s0)
|
||||
80000164: 00178793 addi a5,a5,1
|
||||
80000168: fcf42e23 sw a5,-36(s0)
|
||||
8000016c: fdc42703 lw a4,-36(s0)
|
||||
80000170: fcc42783 lw a5,-52(s0)
|
||||
80000174: fcf76ee3 bltu a4,a5,80000150 <createThreads+0x5c>
|
||||
80000178: 000f0113 mv sp,t5
|
||||
8000017c: 00000513 li a0,0
|
||||
80000180: fc442f83 lw t6,-60(s0)
|
||||
80000184: fcc42d83 lw s11,-52(s0)
|
||||
80000188: 01bfe0eb 0x1bfe0eb
|
||||
8000018c: 00000073 ecall
|
||||
80000190: 00000013 nop
|
||||
80000194: 04c12403 lw s0,76(sp)
|
||||
80000198: 04812483 lw s1,72(sp)
|
||||
8000019c: 04412903 lw s2,68(sp)
|
||||
800001a0: 04012983 lw s3,64(sp)
|
||||
800001a4: 03c12a03 lw s4,60(sp)
|
||||
800001a8: 03812d03 lw s10,56(sp)
|
||||
800001ac: 03412d83 lw s11,52(sp)
|
||||
800001b0: 05010113 addi sp,sp,80
|
||||
800001b4: 00008067 ret
|
||||
800000f0 <createThreads>:
|
||||
800000f0: fb010113 addi sp,sp,-80
|
||||
800000f4: 04812623 sw s0,76(sp)
|
||||
800000f8: 05212423 sw s2,72(sp)
|
||||
800000fc: 05312223 sw s3,68(sp)
|
||||
80000100: 05412023 sw s4,64(sp)
|
||||
80000104: 03a12e23 sw s10,60(sp)
|
||||
80000108: 03b12c23 sw s11,56(sp)
|
||||
8000010c: 05010413 addi s0,sp,80
|
||||
80000110: fca42623 sw a0,-52(s0)
|
||||
80000114: fcb42423 sw a1,-56(s0)
|
||||
80000118: fcc42223 sw a2,-60(s0)
|
||||
8000011c: fcd42023 sw a3,-64(s0)
|
||||
80000120: fae42e23 sw a4,-68(s0)
|
||||
80000124: faf42c23 sw a5,-72(s0)
|
||||
80000128: fc042903 lw s2,-64(s0)
|
||||
8000012c: fbc42983 lw s3,-68(s0)
|
||||
80000130: fb842a03 lw s4,-72(s0)
|
||||
80000134: fc842583 lw a1,-56(s0)
|
||||
80000138: 00010f13 mv t5,sp
|
||||
8000013c: 00100793 li a5,1
|
||||
80000140: fcf42e23 sw a5,-36(s0)
|
||||
80000144: 0200006f j 80000164 <createThreads+0x74>
|
||||
80000148: fdc42503 lw a0,-36(s0)
|
||||
8000014c: fdc42303 lw t1,-36(s0)
|
||||
80000150: f0010113 addi sp,sp,-256
|
||||
80000154: 0003506b 0x3506b
|
||||
80000158: fdc42783 lw a5,-36(s0)
|
||||
8000015c: 00178793 addi a5,a5,1
|
||||
80000160: fcf42e23 sw a5,-36(s0)
|
||||
80000164: fdc42703 lw a4,-36(s0)
|
||||
80000168: fcc42783 lw a5,-52(s0)
|
||||
8000016c: fcf76ee3 bltu a4,a5,80000148 <createThreads+0x58>
|
||||
80000170: 000f0113 mv sp,t5
|
||||
80000174: 00000513 li a0,0
|
||||
80000178: fc442f83 lw t6,-60(s0)
|
||||
8000017c: fcc42d83 lw s11,-52(s0)
|
||||
80000180: 01bfe0eb 0x1bfe0eb
|
||||
80000184: 00000073 ecall
|
||||
80000188: 00000013 nop
|
||||
8000018c: 04c12403 lw s0,76(sp)
|
||||
80000190: 04812903 lw s2,72(sp)
|
||||
80000194: 04412983 lw s3,68(sp)
|
||||
80000198: 04012a03 lw s4,64(sp)
|
||||
8000019c: 03c12d03 lw s10,60(sp)
|
||||
800001a0: 03812d83 lw s11,56(sp)
|
||||
800001a4: 05010113 addi sp,sp,80
|
||||
800001a8: 00008067 ret
|
||||
|
||||
800001b8 <wspawn>:
|
||||
800001b8: fd010113 addi sp,sp,-48
|
||||
800001bc: 02812623 sw s0,44(sp)
|
||||
800001c0: 03010413 addi s0,sp,48
|
||||
800001c4: fea42623 sw a0,-20(s0)
|
||||
800001c8: feb42423 sw a1,-24(s0)
|
||||
800001cc: fec42223 sw a2,-28(s0)
|
||||
800001d0: fed42023 sw a3,-32(s0)
|
||||
800001d4: fce42e23 sw a4,-36(s0)
|
||||
800001d8: fcf42c23 sw a5,-40(s0)
|
||||
800001dc: fd842383 lw t2,-40(s0)
|
||||
800001e0: 800007b7 lui a5,0x80000
|
||||
800001e4: 0f478793 addi a5,a5,244 # 800000f4 <y+0xff000074>
|
||||
800001e8: 00078313 mv t1,a5
|
||||
800001ec: fec42503 lw a0,-20(s0)
|
||||
800001f0: fe842583 lw a1,-24(s0)
|
||||
800001f4: fe442783 lw a5,-28(s0)
|
||||
800001f8: 00078613 mv a2,a5
|
||||
800001fc: fe042683 lw a3,-32(s0)
|
||||
80000200: fdc42703 lw a4,-36(s0)
|
||||
80000204: 00038793 mv a5,t2
|
||||
80000208: 0003006b 0x3006b
|
||||
8000020c: 00000013 nop
|
||||
80000210: 02c12403 lw s0,44(sp)
|
||||
80000214: 03010113 addi sp,sp,48
|
||||
80000218: 00008067 ret
|
||||
800001ac <wspawn>:
|
||||
800001ac: fd010113 addi sp,sp,-48
|
||||
800001b0: 02812623 sw s0,44(sp)
|
||||
800001b4: 03010413 addi s0,sp,48
|
||||
800001b8: fea42623 sw a0,-20(s0)
|
||||
800001bc: feb42423 sw a1,-24(s0)
|
||||
800001c0: fec42223 sw a2,-28(s0)
|
||||
800001c4: fed42023 sw a3,-32(s0)
|
||||
800001c8: fce42e23 sw a4,-36(s0)
|
||||
800001cc: fcf42c23 sw a5,-40(s0)
|
||||
800001d0: fd842383 lw t2,-40(s0)
|
||||
800001d4: 800007b7 lui a5,0x80000
|
||||
800001d8: 0f078793 addi a5,a5,240 # 800000f0 <y+0xff000070>
|
||||
800001dc: 00078313 mv t1,a5
|
||||
800001e0: fec42503 lw a0,-20(s0)
|
||||
800001e4: fe842583 lw a1,-24(s0)
|
||||
800001e8: fe442783 lw a5,-28(s0)
|
||||
800001ec: 00078613 mv a2,a5
|
||||
800001f0: fe042683 lw a3,-32(s0)
|
||||
800001f4: fdc42703 lw a4,-36(s0)
|
||||
800001f8: 00038793 mv a5,t2
|
||||
800001fc: 0003006b 0x3006b
|
||||
80000200: 00000013 nop
|
||||
80000204: 02c12403 lw s0,44(sp)
|
||||
80000208: 03010113 addi sp,sp,48
|
||||
8000020c: 00008067 ret
|
||||
|
||||
8000021c <createWarps>:
|
||||
8000021c: fc010113 addi sp,sp,-64
|
||||
80000220: 02112e23 sw ra,60(sp)
|
||||
80000224: 02812c23 sw s0,56(sp)
|
||||
80000228: 04010413 addi s0,sp,64
|
||||
8000022c: fca42e23 sw a0,-36(s0)
|
||||
80000230: fcb42c23 sw a1,-40(s0)
|
||||
80000234: fcc42a23 sw a2,-44(s0)
|
||||
80000238: fcd42823 sw a3,-48(s0)
|
||||
8000023c: fce42623 sw a4,-52(s0)
|
||||
80000240: fcf42423 sw a5,-56(s0)
|
||||
80000244: 00010f13 mv t5,sp
|
||||
80000248: 00100793 li a5,1
|
||||
8000024c: fef42623 sw a5,-20(s0)
|
||||
80000250: 0300006f j 80000280 <createWarps+0x64>
|
||||
80000254: 80010113 addi sp,sp,-2048
|
||||
80000258: fc842783 lw a5,-56(s0)
|
||||
8000025c: fcc42703 lw a4,-52(s0)
|
||||
80000260: fd042683 lw a3,-48(s0)
|
||||
80000264: fd442603 lw a2,-44(s0)
|
||||
80000268: fec42583 lw a1,-20(s0)
|
||||
8000026c: fd842503 lw a0,-40(s0)
|
||||
80000270: f49ff0ef jal ra,800001b8 <wspawn>
|
||||
80000274: fec42783 lw a5,-20(s0)
|
||||
80000278: 00178793 addi a5,a5,1
|
||||
8000027c: fef42623 sw a5,-20(s0)
|
||||
80000280: fec42703 lw a4,-20(s0)
|
||||
80000284: fdc42783 lw a5,-36(s0)
|
||||
80000288: fcf766e3 bltu a4,a5,80000254 <createWarps+0x38>
|
||||
8000028c: 000f0113 mv sp,t5
|
||||
80000290: fd442603 lw a2,-44(s0)
|
||||
80000294: fc842783 lw a5,-56(s0)
|
||||
80000298: fcc42703 lw a4,-52(s0)
|
||||
8000029c: fd042683 lw a3,-48(s0)
|
||||
800002a0: 00000593 li a1,0
|
||||
800002a4: fd842503 lw a0,-40(s0)
|
||||
800002a8: e4dff0ef jal ra,800000f4 <createThreads>
|
||||
800002ac: 00000073 ecall
|
||||
800002b0: 00000013 nop
|
||||
800002b4: 03c12083 lw ra,60(sp)
|
||||
800002b8: 03812403 lw s0,56(sp)
|
||||
800002bc: 04010113 addi sp,sp,64
|
||||
800002c0: 00008067 ret
|
||||
80000210 <createWarps>:
|
||||
80000210: fc010113 addi sp,sp,-64
|
||||
80000214: 02112e23 sw ra,60(sp)
|
||||
80000218: 02812c23 sw s0,56(sp)
|
||||
8000021c: 04010413 addi s0,sp,64
|
||||
80000220: fca42e23 sw a0,-36(s0)
|
||||
80000224: fcb42c23 sw a1,-40(s0)
|
||||
80000228: fcc42a23 sw a2,-44(s0)
|
||||
8000022c: fcd42823 sw a3,-48(s0)
|
||||
80000230: fce42623 sw a4,-52(s0)
|
||||
80000234: fcf42423 sw a5,-56(s0)
|
||||
80000238: 00010f13 mv t5,sp
|
||||
8000023c: 00100793 li a5,1
|
||||
80000240: fef42623 sw a5,-20(s0)
|
||||
80000244: 0300006f j 80000274 <createWarps+0x64>
|
||||
80000248: 80010113 addi sp,sp,-2048
|
||||
8000024c: fc842783 lw a5,-56(s0)
|
||||
80000250: fcc42703 lw a4,-52(s0)
|
||||
80000254: fd042683 lw a3,-48(s0)
|
||||
80000258: fd442603 lw a2,-44(s0)
|
||||
8000025c: fec42583 lw a1,-20(s0)
|
||||
80000260: fd842503 lw a0,-40(s0)
|
||||
80000264: f49ff0ef jal ra,800001ac <wspawn>
|
||||
80000268: fec42783 lw a5,-20(s0)
|
||||
8000026c: 00178793 addi a5,a5,1
|
||||
80000270: fef42623 sw a5,-20(s0)
|
||||
80000274: fec42703 lw a4,-20(s0)
|
||||
80000278: fdc42783 lw a5,-36(s0)
|
||||
8000027c: fcf766e3 bltu a4,a5,80000248 <createWarps+0x38>
|
||||
80000280: 000f0113 mv sp,t5
|
||||
80000284: fd442603 lw a2,-44(s0)
|
||||
80000288: fc842783 lw a5,-56(s0)
|
||||
8000028c: fcc42703 lw a4,-52(s0)
|
||||
80000290: fd042683 lw a3,-48(s0)
|
||||
80000294: 00000593 li a1,0
|
||||
80000298: fd842503 lw a0,-40(s0)
|
||||
8000029c: e55ff0ef jal ra,800000f0 <createThreads>
|
||||
800002a0: 00000073 ecall
|
||||
800002a4: 00000013 nop
|
||||
800002a8: 03c12083 lw ra,60(sp)
|
||||
800002ac: 03812403 lw s0,56(sp)
|
||||
800002b0: 04010113 addi sp,sp,64
|
||||
800002b4: 00008067 ret
|
||||
|
||||
800002c4 <get_wid>:
|
||||
800002c4: ff010113 addi sp,sp,-16
|
||||
800002c8: 00812623 sw s0,12(sp)
|
||||
800002cc: 00912423 sw s1,8(sp)
|
||||
800002d0: 01010413 addi s0,sp,16
|
||||
800002d4: 00048793 mv a5,s1
|
||||
800002d8: 00078513 mv a0,a5
|
||||
800002dc: 00c12403 lw s0,12(sp)
|
||||
800002e0: 00812483 lw s1,8(sp)
|
||||
800002e4: 01010113 addi sp,sp,16
|
||||
800002e8: 00008067 ret
|
||||
800002b8 <get_wid>:
|
||||
800002b8: ff010113 addi sp,sp,-16
|
||||
800002bc: 00812623 sw s0,12(sp)
|
||||
800002c0: 00912423 sw s1,8(sp)
|
||||
800002c4: 01010413 addi s0,sp,16
|
||||
800002c8: 00048793 mv a5,s1
|
||||
800002cc: 00078513 mv a0,a5
|
||||
800002d0: 00c12403 lw s0,12(sp)
|
||||
800002d4: 00812483 lw s1,8(sp)
|
||||
800002d8: 01010113 addi sp,sp,16
|
||||
800002dc: 00008067 ret
|
||||
|
||||
800002ec <get_1st_arg>:
|
||||
800002ec: ff010113 addi sp,sp,-16
|
||||
800002f0: 00812623 sw s0,12(sp)
|
||||
800002f4: 01212423 sw s2,8(sp)
|
||||
800002f8: 01010413 addi s0,sp,16
|
||||
800002fc: 00090793 mv a5,s2
|
||||
80000300: 00078513 mv a0,a5
|
||||
80000304: 00c12403 lw s0,12(sp)
|
||||
80000308: 00812903 lw s2,8(sp)
|
||||
8000030c: 01010113 addi sp,sp,16
|
||||
80000310: 00008067 ret
|
||||
800002e0 <get_1st_arg>:
|
||||
800002e0: ff010113 addi sp,sp,-16
|
||||
800002e4: 00812623 sw s0,12(sp)
|
||||
800002e8: 01212423 sw s2,8(sp)
|
||||
800002ec: 01010413 addi s0,sp,16
|
||||
800002f0: 00090793 mv a5,s2
|
||||
800002f4: 00078513 mv a0,a5
|
||||
800002f8: 00c12403 lw s0,12(sp)
|
||||
800002fc: 00812903 lw s2,8(sp)
|
||||
80000300: 01010113 addi sp,sp,16
|
||||
80000304: 00008067 ret
|
||||
|
||||
80000314 <get_2nd_arg>:
|
||||
80000314: ff010113 addi sp,sp,-16
|
||||
80000318: 00812623 sw s0,12(sp)
|
||||
8000031c: 01312423 sw s3,8(sp)
|
||||
80000320: 01010413 addi s0,sp,16
|
||||
80000324: 00098793 mv a5,s3
|
||||
80000328: 00078513 mv a0,a5
|
||||
8000032c: 00c12403 lw s0,12(sp)
|
||||
80000330: 00812983 lw s3,8(sp)
|
||||
80000334: 01010113 addi sp,sp,16
|
||||
80000338: 00008067 ret
|
||||
80000308 <get_2nd_arg>:
|
||||
80000308: ff010113 addi sp,sp,-16
|
||||
8000030c: 00812623 sw s0,12(sp)
|
||||
80000310: 01312423 sw s3,8(sp)
|
||||
80000314: 01010413 addi s0,sp,16
|
||||
80000318: 00098793 mv a5,s3
|
||||
8000031c: 00078513 mv a0,a5
|
||||
80000320: 00c12403 lw s0,12(sp)
|
||||
80000324: 00812983 lw s3,8(sp)
|
||||
80000328: 01010113 addi sp,sp,16
|
||||
8000032c: 00008067 ret
|
||||
|
||||
8000033c <get_3rd_arg>:
|
||||
8000033c: ff010113 addi sp,sp,-16
|
||||
80000340: 00812623 sw s0,12(sp)
|
||||
80000344: 01412423 sw s4,8(sp)
|
||||
80000348: 01010413 addi s0,sp,16
|
||||
8000034c: 000a0793 mv a5,s4
|
||||
80000350: 00078513 mv a0,a5
|
||||
80000354: 00c12403 lw s0,12(sp)
|
||||
80000358: 00812a03 lw s4,8(sp)
|
||||
8000035c: 01010113 addi sp,sp,16
|
||||
80000360: 00008067 ret
|
||||
80000330 <get_3rd_arg>:
|
||||
80000330: ff010113 addi sp,sp,-16
|
||||
80000334: 00812623 sw s0,12(sp)
|
||||
80000338: 01412423 sw s4,8(sp)
|
||||
8000033c: 01010413 addi s0,sp,16
|
||||
80000340: 000a0793 mv a5,s4
|
||||
80000344: 00078513 mv a0,a5
|
||||
80000348: 00c12403 lw s0,12(sp)
|
||||
8000034c: 00812a03 lw s4,8(sp)
|
||||
80000350: 01010113 addi sp,sp,16
|
||||
80000354: 00008067 ret
|
||||
|
||||
80000364 <initiate_stack>:
|
||||
80000364: ff010113 addi sp,sp,-16
|
||||
80000368: 00812623 sw s0,12(sp)
|
||||
8000036c: 01010413 addi s0,sp,16
|
||||
80000370: 7ffff137 lui sp,0x7ffff
|
||||
80000374: 00000013 nop
|
||||
80000378: 00c12403 lw s0,12(sp) # 7ffff00c <main-0xff4>
|
||||
8000037c: 01010113 addi sp,sp,16
|
||||
80000380: 00008067 ret
|
||||
80000358 <initiate_stack>:
|
||||
80000358: ff010113 addi sp,sp,-16
|
||||
8000035c: 00812623 sw s0,12(sp)
|
||||
80000360: 01010413 addi s0,sp,16
|
||||
80000364: 7ffff137 lui sp,0x7ffff
|
||||
80000368: 00000013 nop
|
||||
8000036c: 00c12403 lw s0,12(sp) # 7ffff00c <main-0xff4>
|
||||
80000370: 01010113 addi sp,sp,16
|
||||
80000374: 00008067 ret
|
||||
|
||||
Disassembly of section .bss:
|
||||
|
||||
|
|
Binary file not shown.
|
@ -1,61 +1,60 @@
|
|||
:0200000480007A
|
||||
:10000000130101FF232611002324810013040101A1
|
||||
:10001000EF004035B707008193870700370700815D
|
||||
:10001000EF008034B707008193870700370700811E
|
||||
:1000200013070708B7060081938606043706008089
|
||||
:10003000130686059305800013052000EF00001EBF
|
||||
:10003000130686059305400013054000EF00401DA0
|
||||
:1000400093070000138507008320C100032481006B
|
||||
:100050001301010167800000130101FC232E11022E
|
||||
:10006000232C8102130401042326A4FCEF00802525
|
||||
:100070002326A4FEEF0080272324A4FEEF0080297E
|
||||
:100080002322A4FEEF00802B2320A4FE8327C4FE9E
|
||||
:10009000939737000327C4FCB307F700232EF4FC23
|
||||
:1000A0008327C4FD93972700032784FEB307F70037
|
||||
:1000B00083A607008327C4FD93972700032744FEE8
|
||||
:1000C000B307F70003A707008327C4FD9397270012
|
||||
:1000D000032604FEB307F6003387E60023A0E700FB
|
||||
:1000E000130000008320C1030324810313010104D2
|
||||
:1000F00067800000130101FB23268104232491045F
|
||||
:100100002322210523203105232E4103232CA10383
|
||||
:10011000232AB103130401052326A4FC2324B4FCE1
|
||||
:100120002322C4FC2320D4FC232EE4FA232CF4FA4B
|
||||
:10013000032904FC8329C4FB032A84FB832484FC55
|
||||
:10014000130F010093071000232EF4FC6F00000230
|
||||
:100150000325C4FD0323C4FD130101F06B5003000C
|
||||
:100160008327C4FD93871700232EF4FC0327C4FDC7
|
||||
:100170008327C4FCE36EF7FC13010F001305000096
|
||||
:10018000832F44FC832DC4FCEBE0BF01730000000F
|
||||
:10019000130000000324C1048324810403294104C3
|
||||
:1001A00083290104032AC103032D8103832D410305
|
||||
:1001B0001301010567800000130101FD2326810260
|
||||
:1001C000130401032326A4FE2324B4FE2322C4FE29
|
||||
:1001D0002320D4FE232EE4FC232CF4FC832384FD73
|
||||
:1001E000B70700809387470F138307000325C4FEDA
|
||||
:1001F000832584FE832744FE13860700832604FE9E
|
||||
:100200000327C4FD938703006B0003001300000065
|
||||
:100210000324C1021301010367800000130101FCE4
|
||||
:10022000232E1102232C810213040104232EA4FC8B
|
||||
:10023000232CB4FC232AC4FC2328D4FC2326E4FC6E
|
||||
:100240002324F4FC130F0100930710002326F4FE6F
|
||||
:100250006F00000313010180832784FC0327C4FC83
|
||||
:10026000832604FD032644FD8325C4FE032584FD67
|
||||
:10027000EFF09FF48327C4FE938717002326F4FE34
|
||||
:100280000327C4FE8327C4FDE366F7FC13010F00B8
|
||||
:10029000032644FD832784FC0327C4FC832604FD36
|
||||
:1002A00093050000032584FDEFF0DFE473000000F8
|
||||
:1002B000130000008320C103032481031301010400
|
||||
:1002C00067800000130101FF232681002324910091
|
||||
:1002D0001304010193870400138507000324C10060
|
||||
:1002E000832481001301010167800000130101FFD5
|
||||
:1002F000232681002324210113040101930709000F
|
||||
:10030000138507000324C1000329810013010101A3
|
||||
:1003100067800000130101FF23268100232431019F
|
||||
:100320001304010193870900138507000324C1000A
|
||||
:10033000832981001301010167800000130101FF7F
|
||||
:1003400023268100232441011304010193070A009D
|
||||
:10035000138507000324C100032A81001301010152
|
||||
:1003600067800000130101FF2326810013040101AF
|
||||
:1003700037F1FF7F130000000324C10013010101C6
|
||||
:040380006780000092
|
||||
:100050001301010167800000130101FD2326110235
|
||||
:100060002324810213040103232EA4FC232CB4FCBB
|
||||
:10007000EF0000272326A4FEEF0000292324A4FE7E
|
||||
:10008000EF00002B2322A4FE832784FD93972700F3
|
||||
:100090000327C4FDB307F7002320F4FE832704FEE3
|
||||
:1000A000939727000327C4FEB307F70083A6070032
|
||||
:1000B000832704FE93972700032784FEB307F700E6
|
||||
:1000C00003A70700832704FE93972700032644FE17
|
||||
:1000D000B307F6003387E60023A0E7001300000013
|
||||
:1000E0008320C10203248102130101036780000001
|
||||
:1000F000130101FB2326810423242105232231053A
|
||||
:1001000023204105232EA103232CB1031304010551
|
||||
:100110002326A4FC2324B4FC2322C4FC2320D4FCE7
|
||||
:10012000232EE4FA232CF4FA032904FC8329C4FBCC
|
||||
:10013000032A84FB832584FC130F0100930710001E
|
||||
:10014000232EF4FC6F0000020325C4FD0323C4FD2D
|
||||
:10015000130101F06B5003008327C4FD9387170040
|
||||
:10016000232EF4FC0327C4FD8327C4FCE36EF7FCB5
|
||||
:1001700013010F0013050000832F44FC832DC4FCE2
|
||||
:10018000EBE0BF0173000000130000000324C10472
|
||||
:100190000329810483294104032A0104032DC10397
|
||||
:1001A000832D81031301010567800000130101FD08
|
||||
:1001B00023268102130401032326A4FE2324B4FE74
|
||||
:1001C0002322C4FE2320D4FE232EE4FC232CF4FCA3
|
||||
:1001D000832384FDB70700809387070F13830700ED
|
||||
:1001E0000325C4FE832584FE832744FE138607006F
|
||||
:1001F000832604FE0327C4FD938703006B000300DE
|
||||
:10020000130000000324C1021301010367800000F2
|
||||
:10021000130101FC232E1102232C8102130401047B
|
||||
:10022000232EA4FC232CB4FC232AC4FC2328D4FCB6
|
||||
:100230002326E4FC2324F4FC130F01009307100091
|
||||
:100240002326F4FE6F00000313010180832784FC42
|
||||
:100250000327C4FC832604FD032644FD8325C4FE36
|
||||
:10026000032584FDEFF09FF48327C4FE93871700D6
|
||||
:100270002326F4FE0327C4FE8327C4FDE366F7FCB0
|
||||
:1002800013010F00032644FD832784FC0327C4FCCD
|
||||
:10029000832604FD93050000032584FDEFF05FE550
|
||||
:1002A00073000000130000008320C10303248103B6
|
||||
:1002B0001301010467800000130101FF2326810060
|
||||
:1002C0002324910013040101938704001385070080
|
||||
:1002D0000324C10083248100130101016780000011
|
||||
:1002E000130101FF232681002324210113040101AE
|
||||
:1002F00093070900138507000324C1000329810027
|
||||
:100300001301010167800000130101FF2326810012
|
||||
:100310002324310113040101938709001385070089
|
||||
:100320000324C100832981001301010167800000BB
|
||||
:10033000130101FF2326810023244101130401013D
|
||||
:1003400093070A00138507000324C100032A8100D4
|
||||
:100350001301010167800000130101FF23268100C2
|
||||
:100360001304010137F1FF7F130000000324C100D3
|
||||
:08037000130101016780000088
|
||||
:02000004810079
|
||||
:1000400001000000010000000600000000000000A8
|
||||
:100050000300000001000000010000000200000099
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c -o gpgpu_test.elf
|
||||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c -o gpgpu_test.elf
|
||||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objdump -D gpgpu_test.elf > gpgpu_test.dump
|
||||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex
|
|
@ -10,7 +10,7 @@ void createThreads(unsigned num_threads, unsigned wid, unsigned func_addr, unsig
|
|||
register unsigned *xx asm("s2") = x_ptr;
|
||||
register unsigned *yy asm("s3") = y_ptr;
|
||||
register unsigned *zz asm("s4") = z_ptr;
|
||||
register unsigned wid_ asm("s1") = wid;
|
||||
register unsigned wid_ asm("a1") = wid;
|
||||
|
||||
asm __volatile__("addi t5, sp, 0");
|
||||
for (unsigned i = 1; i < num_threads; i++)
|
||||
|
@ -79,14 +79,6 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
unsigned get_wid()
|
||||
{
|
||||
register unsigned ret asm("s1");
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#define ECALL asm __volatile__(".word 0x00000073")
|
||||
|
||||
|
||||
#define FUNC void (func)(unsigned)
|
||||
#define FUNC void (func)(unsigned, unsigned)
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned *, unsigned *, unsigned *);
|
||||
|
||||
unsigned get_wid();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue