mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
vx_spawn_warps redesign using opencl's style scheduler
This commit is contained in:
parent
138db29310
commit
30d950ada2
35 changed files with 81204 additions and 81014 deletions
|
@ -1,5 +1,5 @@
|
|||
|
||||
/tmp/pocl_vortex_kernel-4d-86-3b-c3-37.elf: file format ELF32-riscv
|
||||
/tmp/pocl_vortex_kernel-41-fe-25-b2-35.elf: file format ELF32-riscv
|
||||
|
||||
|
||||
Disassembly of section .init:
|
||||
|
@ -1557,13 +1557,12 @@ Disassembly of section .strtab:
|
|||
3e: 5f 6b 65 72 <unknown>
|
||||
42: 6e 65 <unknown>
|
||||
44: 6c 2d <unknown>
|
||||
46: 35 38 <unknown>
|
||||
48: 2d 39 <unknown>
|
||||
4a: 31 2d <unknown>
|
||||
4c: 66 32 <unknown>
|
||||
4e: 2d 64 <unknown>
|
||||
50: 38 2d <unknown>
|
||||
52: 61 63 <unknown>
|
||||
46: 65 34 <unknown>
|
||||
48: 2d 38 <unknown>
|
||||
4a: 33 2d 65 39 <unknown>
|
||||
4e: 2d 33 <unknown>
|
||||
50: 36 2d <unknown>
|
||||
52: 65 62 <unknown>
|
||||
54: 2e 63 <unknown>
|
||||
56: 00 70 <unknown>
|
||||
58: 61 72 <unknown>
|
||||
|
|
Binary file not shown.
|
@ -6,443 +6,451 @@ Disassembly of section .init:
|
|||
|
||||
80000000 <_start>:
|
||||
80000000: 00000597 auipc a1,0x0
|
||||
80000004: 0dc58593 addi a1,a1,220 # 800000dc <vx_set_sp>
|
||||
80000008: 02602573 csrr a0,0x26
|
||||
80000004: 0e458593 addi a1,a1,228 # 800000e4 <vx_set_sp>
|
||||
80000008: fc102573 csrr a0,0xfc1
|
||||
8000000c: 00b5106b 0xb5106b
|
||||
80000010: 0cc000ef jal ra,800000dc <vx_set_sp>
|
||||
80000010: 0d4000ef jal ra,800000e4 <vx_set_sp>
|
||||
80000014: 00100513 li a0,1
|
||||
80000018: 0005006b 0x5006b
|
||||
8000001c: c2c18513 addi a0,gp,-980 # 80001994 <__BSS_END__>
|
||||
80000020: c2c18613 addi a2,gp,-980 # 80001994 <__BSS_END__>
|
||||
80000024: 40a60633 sub a2,a2,a0
|
||||
80000028: 00000593 li a1,0
|
||||
8000002c: 2a4000ef jal ra,800002d0 <memset>
|
||||
80000030: 00000517 auipc a0,0x0
|
||||
80000034: 1a850513 addi a0,a0,424 # 800001d8 <__libc_fini_array>
|
||||
80000038: 15c000ef jal ra,80000194 <atexit>
|
||||
8000003c: 1f8000ef jal ra,80000234 <__libc_init_array>
|
||||
80000040: 008000ef jal ra,80000048 <main>
|
||||
80000044: 1640006f j 800001a8 <exit>
|
||||
8000001c: 00002517 auipc a0,0x2
|
||||
80000020: 99050513 addi a0,a0,-1648 # 800019ac <__BSS_END__>
|
||||
80000024: 00002617 auipc a2,0x2
|
||||
80000028: 98860613 addi a2,a2,-1656 # 800019ac <__BSS_END__>
|
||||
8000002c: 40a60633 sub a2,a2,a0
|
||||
80000030: 00000593 li a1,0
|
||||
80000034: 2ac000ef jal ra,800002e0 <memset>
|
||||
80000038: 00000517 auipc a0,0x0
|
||||
8000003c: 1b050513 addi a0,a0,432 # 800001e8 <__libc_fini_array>
|
||||
80000040: 160000ef jal ra,800001a0 <atexit>
|
||||
80000044: 200000ef jal ra,80000244 <__libc_init_array>
|
||||
80000048: 008000ef jal ra,80000050 <main>
|
||||
8000004c: 1680006f j 800001b4 <exit>
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
80000048 <main>:
|
||||
80000048: ff010113 addi sp,sp,-16
|
||||
8000004c: 7ffff7b7 lui a5,0x7ffff
|
||||
80000050: 00812423 sw s0,8(sp)
|
||||
80000054: 0007a403 lw s0,0(a5) # 7ffff000 <_start-0x1000>
|
||||
80000058: 00912223 sw s1,4(sp)
|
||||
8000005c: 01212023 sw s2,0(sp)
|
||||
80000060: 0087a483 lw s1,8(a5)
|
||||
80000064: 0047a903 lw s2,4(a5)
|
||||
80000068: 00112623 sw ra,12(sp)
|
||||
8000006c: 0f8000ef jal ra,80000164 <vx_core_id>
|
||||
80000070: 02850533 mul a0,a0,s0
|
||||
80000074: 02040863 beqz s0,800000a4 <main+0x5c>
|
||||
80000078: 00a40733 add a4,s0,a0
|
||||
8000007c: 00271713 slli a4,a4,0x2
|
||||
80000080: 00251513 slli a0,a0,0x2
|
||||
80000084: 012507b3 add a5,a0,s2
|
||||
80000088: 01270733 add a4,a4,s2
|
||||
8000008c: 412485b3 sub a1,s1,s2
|
||||
80000090: 0007a603 lw a2,0(a5)
|
||||
80000094: 00f586b3 add a3,a1,a5
|
||||
80000098: 00478793 addi a5,a5,4
|
||||
8000009c: 00c6a023 sw a2,0(a3)
|
||||
800000a0: fef718e3 bne a4,a5,80000090 <main+0x48>
|
||||
800000a4: 00c12083 lw ra,12(sp)
|
||||
800000a8: 00812403 lw s0,8(sp)
|
||||
800000ac: 00412483 lw s1,4(sp)
|
||||
800000b0: 00012903 lw s2,0(sp)
|
||||
800000b4: 01010113 addi sp,sp,16
|
||||
800000b8: 00008067 ret
|
||||
80000050 <main>:
|
||||
80000050: ff010113 addi sp,sp,-16
|
||||
80000054: 7ffff7b7 lui a5,0x7ffff
|
||||
80000058: 00812423 sw s0,8(sp)
|
||||
8000005c: 0007a403 lw s0,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
|
||||
80000060: 00912223 sw s1,4(sp)
|
||||
80000064: 01212023 sw s2,0(sp)
|
||||
80000068: 0087a483 lw s1,8(a5)
|
||||
8000006c: 0047a903 lw s2,4(a5)
|
||||
80000070: 00112623 sw ra,12(sp)
|
||||
80000074: 0fc000ef jal ra,80000170 <vx_core_id>
|
||||
80000078: 02850533 mul a0,a0,s0
|
||||
8000007c: 02040863 beqz s0,800000ac <main+0x5c>
|
||||
80000080: 00a40733 add a4,s0,a0
|
||||
80000084: 00271713 slli a4,a4,0x2
|
||||
80000088: 00251513 slli a0,a0,0x2
|
||||
8000008c: 012507b3 add a5,a0,s2
|
||||
80000090: 01270733 add a4,a4,s2
|
||||
80000094: 412485b3 sub a1,s1,s2
|
||||
80000098: 0007a603 lw a2,0(a5)
|
||||
8000009c: 00f586b3 add a3,a1,a5
|
||||
800000a0: 00478793 addi a5,a5,4
|
||||
800000a4: 00c6a023 sw a2,0(a3)
|
||||
800000a8: fef718e3 bne a4,a5,80000098 <main+0x48>
|
||||
800000ac: 00c12083 lw ra,12(sp)
|
||||
800000b0: 00812403 lw s0,8(sp)
|
||||
800000b4: 00412483 lw s1,4(sp)
|
||||
800000b8: 00012903 lw s2,0(sp)
|
||||
800000bc: 01010113 addi sp,sp,16
|
||||
800000c0: 00008067 ret
|
||||
|
||||
800000bc <register_fini>:
|
||||
800000bc: 00000793 li a5,0
|
||||
800000c0: 00078863 beqz a5,800000d0 <register_fini+0x14>
|
||||
800000c4: 80000537 lui a0,0x80000
|
||||
800000c8: 1d850513 addi a0,a0,472 # 800001d8 <__global_pointer$+0xffffe470>
|
||||
800000cc: 0c80006f j 80000194 <atexit>
|
||||
800000d0: 00008067 ret
|
||||
800000c4 <register_fini>:
|
||||
800000c4: 00000793 li a5,0
|
||||
800000c8: 00078863 beqz a5,800000d8 <register_fini+0x14>
|
||||
800000cc: 80000537 lui a0,0x80000
|
||||
800000d0: 1e850513 addi a0,a0,488 # 800001e8 <__stack_top+0x810001e8>
|
||||
800000d4: 0cc0006f j 800001a0 <atexit>
|
||||
800000d8: 00008067 ret
|
||||
|
||||
800000d4 <_exit>:
|
||||
800000d4: 00000513 li a0,0
|
||||
800000d8: 0005006b 0x5006b
|
||||
|
||||
800000dc <vx_set_sp>:
|
||||
800000dc: 02502573 csrr a0,0x25
|
||||
800000dc <_exit>:
|
||||
800000dc: 00000513 li a0,0
|
||||
800000e0: 0005006b 0x5006b
|
||||
800000e4: 00002197 auipc gp,0x2
|
||||
800000e8: c8418193 addi gp,gp,-892 # 80001d68 <__global_pointer$>
|
||||
800000ec: 022025f3 csrr a1,0x22
|
||||
800000f0: 00a59593 slli a1,a1,0xa
|
||||
800000f4: 02002673 csrr a2,0x20
|
||||
800000f8: 00261613 slli a2,a2,0x2
|
||||
800000fc: 6ffff137 lui sp,0x6ffff
|
||||
80000100: 40b10133 sub sp,sp,a1
|
||||
80000104: 00c10133 add sp,sp,a2
|
||||
80000108: 021026f3 csrr a3,0x21
|
||||
8000010c: 00068663 beqz a3,80000118 <RETURN>
|
||||
80000110: 00000513 li a0,0
|
||||
80000114: 0005006b 0x5006b
|
||||
|
||||
80000118 <RETURN>:
|
||||
80000118: 00008067 ret
|
||||
800000e4 <vx_set_sp>:
|
||||
800000e4: fc002573 csrr a0,0xfc0
|
||||
800000e8: 0005006b 0x5006b
|
||||
800000ec: 00002197 auipc gp,0x2
|
||||
800000f0: c9418193 addi gp,gp,-876 # 80001d80 <__global_pointer>
|
||||
800000f4: 7f000117 auipc sp,0x7f000
|
||||
800000f8: f0c10113 addi sp,sp,-244 # ff000000 <__stack_top>
|
||||
800000fc: 40000593 li a1,1024
|
||||
80000100: cc102673 csrr a2,0xcc1
|
||||
80000104: 02c585b3 mul a1,a1,a2
|
||||
80000108: 40b10133 sub sp,sp,a1
|
||||
8000010c: cc3026f3 csrr a3,0xcc3
|
||||
80000110: 00068663 beqz a3,8000011c <RETURN>
|
||||
80000114: 00000513 li a0,0
|
||||
80000118: 0005006b 0x5006b
|
||||
|
||||
8000011c <vx_wspawn>:
|
||||
8000011c: 00b5106b 0xb5106b
|
||||
80000120: 00008067 ret
|
||||
8000011c <RETURN>:
|
||||
8000011c: 00008067 ret
|
||||
|
||||
80000124 <vx_tmc>:
|
||||
80000124: 0005006b 0x5006b
|
||||
80000128: 00008067 ret
|
||||
80000120 <vx_wspawn>:
|
||||
80000120: 00b5106b 0xb5106b
|
||||
80000124: 00008067 ret
|
||||
|
||||
8000012c <vx_barrier>:
|
||||
8000012c: 00b5406b 0xb5406b
|
||||
80000130: 00008067 ret
|
||||
80000128 <vx_tmc>:
|
||||
80000128: 0005006b 0x5006b
|
||||
8000012c: 00008067 ret
|
||||
|
||||
80000134 <vx_split>:
|
||||
80000134: 0005206b 0x5206b
|
||||
80000138: 00008067 ret
|
||||
80000130 <vx_barrier>:
|
||||
80000130: 00b5406b 0xb5406b
|
||||
80000134: 00008067 ret
|
||||
|
||||
8000013c <vx_join>:
|
||||
8000013c: 0000306b 0x306b
|
||||
80000140: 00008067 ret
|
||||
80000138 <vx_split>:
|
||||
80000138: 0005206b 0x5206b
|
||||
8000013c: 00008067 ret
|
||||
|
||||
80000144 <vx_warp_id>:
|
||||
80000144: 02102573 csrr a0,0x21
|
||||
80000148: 00008067 ret
|
||||
80000140 <vx_join>:
|
||||
80000140: 0000306b 0x306b
|
||||
80000144: 00008067 ret
|
||||
|
||||
8000014c <vx_warp_gid>:
|
||||
8000014c: 02302573 csrr a0,0x23
|
||||
80000150: 00008067 ret
|
||||
80000148 <vx_warp_id>:
|
||||
80000148: cc302573 csrr a0,0xcc3
|
||||
8000014c: 00008067 ret
|
||||
|
||||
80000154 <vx_thread_id>:
|
||||
80000154: 02002573 csrr a0,0x20
|
||||
80000158: 00008067 ret
|
||||
80000150 <vx_warp_gid>:
|
||||
80000150: f1402573 csrr a0,mhartid
|
||||
80000154: 00008067 ret
|
||||
|
||||
8000015c <vx_thread_gid>:
|
||||
8000015c: 02202573 csrr a0,0x22
|
||||
80000160: 00008067 ret
|
||||
80000158 <vx_thread_id>:
|
||||
80000158: cc002573 csrr a0,0xcc0
|
||||
8000015c: 00008067 ret
|
||||
|
||||
80000164 <vx_core_id>:
|
||||
80000164: 02402573 csrr a0,0x24
|
||||
80000168: 00008067 ret
|
||||
80000160 <vx_thread_lid>:
|
||||
80000160: cc102573 csrr a0,0xcc1
|
||||
80000164: 00008067 ret
|
||||
|
||||
8000016c <vx_num_threads>:
|
||||
8000016c: 02502573 csrr a0,0x25
|
||||
80000170: 00008067 ret
|
||||
80000168 <vx_thread_gid>:
|
||||
80000168: cc202573 csrr a0,0xcc2
|
||||
8000016c: 00008067 ret
|
||||
|
||||
80000174 <vx_num_warps>:
|
||||
80000174: 02602573 csrr a0,0x26
|
||||
80000178: 00008067 ret
|
||||
80000170 <vx_core_id>:
|
||||
80000170: cc502573 csrr a0,0xcc5
|
||||
80000174: 00008067 ret
|
||||
|
||||
8000017c <vx_num_cores>:
|
||||
8000017c: 02702573 csrr a0,0x27
|
||||
80000180: 00008067 ret
|
||||
80000178 <vx_num_threads>:
|
||||
80000178: fc002573 csrr a0,0xfc0
|
||||
8000017c: 00008067 ret
|
||||
|
||||
80000184 <vx_num_cycles>:
|
||||
80000184: b0002573 csrr a0,mcycle
|
||||
80000188: 00008067 ret
|
||||
80000180 <vx_num_warps>:
|
||||
80000180: fc102573 csrr a0,0xfc1
|
||||
80000184: 00008067 ret
|
||||
|
||||
8000018c <vx_num_instrs>:
|
||||
8000018c: b0202573 csrr a0,minstret
|
||||
80000190: 00008067 ret
|
||||
80000188 <vx_num_cores>:
|
||||
80000188: fc202573 csrr a0,0xfc2
|
||||
8000018c: 00008067 ret
|
||||
|
||||
80000194 <atexit>:
|
||||
80000194: 00050593 mv a1,a0
|
||||
80000198: 00000693 li a3,0
|
||||
8000019c: 00000613 li a2,0
|
||||
800001a0: 00000513 li a0,0
|
||||
800001a4: 2080006f j 800003ac <__register_exitproc>
|
||||
80000190 <vx_num_cycles>:
|
||||
80000190: b0002573 csrr a0,mcycle
|
||||
80000194: 00008067 ret
|
||||
|
||||
800001a8 <exit>:
|
||||
800001a8: ff010113 addi sp,sp,-16 # 6fffeff0 <_start-0x10001010>
|
||||
800001ac: 00000593 li a1,0
|
||||
800001b0: 00812423 sw s0,8(sp)
|
||||
800001b4: 00112623 sw ra,12(sp)
|
||||
800001b8: 00050413 mv s0,a0
|
||||
800001bc: 288000ef jal ra,80000444 <__call_exitprocs>
|
||||
800001c0: c281a503 lw a0,-984(gp) # 80001990 <_global_impure_ptr>
|
||||
800001c4: 03c52783 lw a5,60(a0)
|
||||
800001c8: 00078463 beqz a5,800001d0 <exit+0x28>
|
||||
800001cc: 000780e7 jalr a5
|
||||
800001d0: 00040513 mv a0,s0
|
||||
800001d4: f01ff0ef jal ra,800000d4 <_exit>
|
||||
80000198 <vx_num_instrs>:
|
||||
80000198: b0202573 csrr a0,minstret
|
||||
8000019c: 00008067 ret
|
||||
|
||||
800001d8 <__libc_fini_array>:
|
||||
800001d8: ff010113 addi sp,sp,-16
|
||||
800001dc: 00812423 sw s0,8(sp)
|
||||
800001e0: 800017b7 lui a5,0x80001
|
||||
800001e4: 80001437 lui s0,0x80001
|
||||
800001e8: 56840413 addi s0,s0,1384 # 80001568 <__global_pointer$+0xfffff800>
|
||||
800001ec: 56878793 addi a5,a5,1384 # 80001568 <__global_pointer$+0xfffff800>
|
||||
800001f0: 408787b3 sub a5,a5,s0
|
||||
800001f4: 00912223 sw s1,4(sp)
|
||||
800001f8: 00112623 sw ra,12(sp)
|
||||
800001fc: 4027d493 srai s1,a5,0x2
|
||||
80000200: 02048063 beqz s1,80000220 <__libc_fini_array+0x48>
|
||||
80000204: ffc78793 addi a5,a5,-4
|
||||
80000208: 00878433 add s0,a5,s0
|
||||
8000020c: 00042783 lw a5,0(s0)
|
||||
80000210: fff48493 addi s1,s1,-1
|
||||
80000214: ffc40413 addi s0,s0,-4
|
||||
80000218: 000780e7 jalr a5
|
||||
8000021c: fe0498e3 bnez s1,8000020c <__libc_fini_array+0x34>
|
||||
80000220: 00c12083 lw ra,12(sp)
|
||||
80000224: 00812403 lw s0,8(sp)
|
||||
80000228: 00412483 lw s1,4(sp)
|
||||
8000022c: 01010113 addi sp,sp,16
|
||||
80000230: 00008067 ret
|
||||
800001a0 <atexit>:
|
||||
800001a0: 00050593 mv a1,a0
|
||||
800001a4: 00000693 li a3,0
|
||||
800001a8: 00000613 li a2,0
|
||||
800001ac: 00000513 li a0,0
|
||||
800001b0: 20c0006f j 800003bc <__register_exitproc>
|
||||
|
||||
80000234 <__libc_init_array>:
|
||||
80000234: ff010113 addi sp,sp,-16
|
||||
80000238: 00812423 sw s0,8(sp)
|
||||
8000023c: 01212023 sw s2,0(sp)
|
||||
80000240: 80001437 lui s0,0x80001
|
||||
80000244: 80001937 lui s2,0x80001
|
||||
80000248: 56440793 addi a5,s0,1380 # 80001564 <__global_pointer$+0xfffff7fc>
|
||||
8000024c: 56490913 addi s2,s2,1380 # 80001564 <__global_pointer$+0xfffff7fc>
|
||||
80000250: 40f90933 sub s2,s2,a5
|
||||
80000254: 00112623 sw ra,12(sp)
|
||||
80000258: 00912223 sw s1,4(sp)
|
||||
8000025c: 40295913 srai s2,s2,0x2
|
||||
80000260: 02090063 beqz s2,80000280 <__libc_init_array+0x4c>
|
||||
80000264: 56440413 addi s0,s0,1380
|
||||
80000268: 00000493 li s1,0
|
||||
8000026c: 00042783 lw a5,0(s0)
|
||||
80000270: 00148493 addi s1,s1,1
|
||||
80000274: 00440413 addi s0,s0,4
|
||||
80000278: 000780e7 jalr a5
|
||||
8000027c: fe9918e3 bne s2,s1,8000026c <__libc_init_array+0x38>
|
||||
80000280: 80001437 lui s0,0x80001
|
||||
80000284: 80001937 lui s2,0x80001
|
||||
80000288: 56440793 addi a5,s0,1380 # 80001564 <__global_pointer$+0xfffff7fc>
|
||||
8000028c: 56890913 addi s2,s2,1384 # 80001568 <__global_pointer$+0xfffff800>
|
||||
80000290: 40f90933 sub s2,s2,a5
|
||||
80000294: 40295913 srai s2,s2,0x2
|
||||
80000298: 02090063 beqz s2,800002b8 <__libc_init_array+0x84>
|
||||
8000029c: 56440413 addi s0,s0,1380
|
||||
800002a0: 00000493 li s1,0
|
||||
800002a4: 00042783 lw a5,0(s0)
|
||||
800002a8: 00148493 addi s1,s1,1
|
||||
800002ac: 00440413 addi s0,s0,4
|
||||
800002b0: 000780e7 jalr a5
|
||||
800002b4: fe9918e3 bne s2,s1,800002a4 <__libc_init_array+0x70>
|
||||
800002b8: 00c12083 lw ra,12(sp)
|
||||
800002bc: 00812403 lw s0,8(sp)
|
||||
800002c0: 00412483 lw s1,4(sp)
|
||||
800002c4: 00012903 lw s2,0(sp)
|
||||
800002c8: 01010113 addi sp,sp,16
|
||||
800002cc: 00008067 ret
|
||||
800001b4 <exit>:
|
||||
800001b4: ff010113 addi sp,sp,-16
|
||||
800001b8: 00000593 li a1,0
|
||||
800001bc: 00812423 sw s0,8(sp)
|
||||
800001c0: 00112623 sw ra,12(sp)
|
||||
800001c4: 00050413 mv s0,a0
|
||||
800001c8: 290000ef jal ra,80000458 <__call_exitprocs>
|
||||
800001cc: 800027b7 lui a5,0x80002
|
||||
800001d0: 9a87a503 lw a0,-1624(a5) # 800019a8 <__stack_top+0x810019a8>
|
||||
800001d4: 03c52783 lw a5,60(a0)
|
||||
800001d8: 00078463 beqz a5,800001e0 <exit+0x2c>
|
||||
800001dc: 000780e7 jalr a5
|
||||
800001e0: 00040513 mv a0,s0
|
||||
800001e4: ef9ff0ef jal ra,800000dc <_exit>
|
||||
|
||||
800002d0 <memset>:
|
||||
800002d0: 00f00313 li t1,15
|
||||
800002d4: 00050713 mv a4,a0
|
||||
800002d8: 02c37e63 bgeu t1,a2,80000314 <memset+0x44>
|
||||
800002dc: 00f77793 andi a5,a4,15
|
||||
800002e0: 0a079063 bnez a5,80000380 <memset+0xb0>
|
||||
800002e4: 08059263 bnez a1,80000368 <memset+0x98>
|
||||
800002e8: ff067693 andi a3,a2,-16
|
||||
800002ec: 00f67613 andi a2,a2,15
|
||||
800002f0: 00e686b3 add a3,a3,a4
|
||||
800002f4: 00b72023 sw a1,0(a4)
|
||||
800002f8: 00b72223 sw a1,4(a4)
|
||||
800002fc: 00b72423 sw a1,8(a4)
|
||||
80000300: 00b72623 sw a1,12(a4)
|
||||
80000304: 01070713 addi a4,a4,16
|
||||
80000308: fed766e3 bltu a4,a3,800002f4 <memset+0x24>
|
||||
8000030c: 00061463 bnez a2,80000314 <memset+0x44>
|
||||
80000310: 00008067 ret
|
||||
80000314: 40c306b3 sub a3,t1,a2
|
||||
80000318: 00269693 slli a3,a3,0x2
|
||||
8000031c: 00000297 auipc t0,0x0
|
||||
80000320: 005686b3 add a3,a3,t0
|
||||
80000324: 00c68067 jr 12(a3)
|
||||
80000328: 00b70723 sb a1,14(a4)
|
||||
8000032c: 00b706a3 sb a1,13(a4)
|
||||
80000330: 00b70623 sb a1,12(a4)
|
||||
80000334: 00b705a3 sb a1,11(a4)
|
||||
80000338: 00b70523 sb a1,10(a4)
|
||||
8000033c: 00b704a3 sb a1,9(a4)
|
||||
80000340: 00b70423 sb a1,8(a4)
|
||||
80000344: 00b703a3 sb a1,7(a4)
|
||||
80000348: 00b70323 sb a1,6(a4)
|
||||
8000034c: 00b702a3 sb a1,5(a4)
|
||||
80000350: 00b70223 sb a1,4(a4)
|
||||
80000354: 00b701a3 sb a1,3(a4)
|
||||
80000358: 00b70123 sb a1,2(a4)
|
||||
8000035c: 00b700a3 sb a1,1(a4)
|
||||
80000360: 00b70023 sb a1,0(a4)
|
||||
80000364: 00008067 ret
|
||||
80000368: 0ff5f593 andi a1,a1,255
|
||||
8000036c: 00859693 slli a3,a1,0x8
|
||||
80000370: 00d5e5b3 or a1,a1,a3
|
||||
80000374: 01059693 slli a3,a1,0x10
|
||||
80000378: 00d5e5b3 or a1,a1,a3
|
||||
8000037c: f6dff06f j 800002e8 <memset+0x18>
|
||||
80000380: 00279693 slli a3,a5,0x2
|
||||
80000384: 00000297 auipc t0,0x0
|
||||
80000388: 005686b3 add a3,a3,t0
|
||||
8000038c: 00008293 mv t0,ra
|
||||
80000390: fa0680e7 jalr -96(a3)
|
||||
80000394: 00028093 mv ra,t0
|
||||
80000398: ff078793 addi a5,a5,-16
|
||||
8000039c: 40f70733 sub a4,a4,a5
|
||||
800003a0: 00f60633 add a2,a2,a5
|
||||
800003a4: f6c378e3 bgeu t1,a2,80000314 <memset+0x44>
|
||||
800003a8: f3dff06f j 800002e4 <memset+0x14>
|
||||
800001e8 <__libc_fini_array>:
|
||||
800001e8: ff010113 addi sp,sp,-16
|
||||
800001ec: 00812423 sw s0,8(sp)
|
||||
800001f0: 800017b7 lui a5,0x80001
|
||||
800001f4: 80001437 lui s0,0x80001
|
||||
800001f8: 58040413 addi s0,s0,1408 # 80001580 <__stack_top+0x81001580>
|
||||
800001fc: 58078793 addi a5,a5,1408 # 80001580 <__stack_top+0x81001580>
|
||||
80000200: 408787b3 sub a5,a5,s0
|
||||
80000204: 00912223 sw s1,4(sp)
|
||||
80000208: 00112623 sw ra,12(sp)
|
||||
8000020c: 4027d493 srai s1,a5,0x2
|
||||
80000210: 02048063 beqz s1,80000230 <__libc_fini_array+0x48>
|
||||
80000214: ffc78793 addi a5,a5,-4
|
||||
80000218: 00878433 add s0,a5,s0
|
||||
8000021c: 00042783 lw a5,0(s0)
|
||||
80000220: fff48493 addi s1,s1,-1
|
||||
80000224: ffc40413 addi s0,s0,-4
|
||||
80000228: 000780e7 jalr a5
|
||||
8000022c: fe0498e3 bnez s1,8000021c <__libc_fini_array+0x34>
|
||||
80000230: 00c12083 lw ra,12(sp)
|
||||
80000234: 00812403 lw s0,8(sp)
|
||||
80000238: 00412483 lw s1,4(sp)
|
||||
8000023c: 01010113 addi sp,sp,16
|
||||
80000240: 00008067 ret
|
||||
|
||||
800003ac <__register_exitproc>:
|
||||
800003ac: c281a703 lw a4,-984(gp) # 80001990 <_global_impure_ptr>
|
||||
800003b0: 14872783 lw a5,328(a4)
|
||||
800003b4: 04078c63 beqz a5,8000040c <__register_exitproc+0x60>
|
||||
800003b8: 0047a703 lw a4,4(a5)
|
||||
800003bc: 01f00813 li a6,31
|
||||
800003c0: 06e84e63 blt a6,a4,8000043c <__register_exitproc+0x90>
|
||||
800003c4: 00271813 slli a6,a4,0x2
|
||||
800003c8: 02050663 beqz a0,800003f4 <__register_exitproc+0x48>
|
||||
800003cc: 01078333 add t1,a5,a6
|
||||
800003d0: 08c32423 sw a2,136(t1)
|
||||
800003d4: 1887a883 lw a7,392(a5)
|
||||
800003d8: 00100613 li a2,1
|
||||
800003dc: 00e61633 sll a2,a2,a4
|
||||
800003e0: 00c8e8b3 or a7,a7,a2
|
||||
800003e4: 1917a423 sw a7,392(a5)
|
||||
800003e8: 10d32423 sw a3,264(t1)
|
||||
800003ec: 00200693 li a3,2
|
||||
800003f0: 02d50463 beq a0,a3,80000418 <__register_exitproc+0x6c>
|
||||
800003f4: 00170713 addi a4,a4,1
|
||||
800003f8: 00e7a223 sw a4,4(a5)
|
||||
800003fc: 010787b3 add a5,a5,a6
|
||||
80000400: 00b7a423 sw a1,8(a5)
|
||||
80000404: 00000513 li a0,0
|
||||
80000408: 00008067 ret
|
||||
8000040c: 14c70793 addi a5,a4,332
|
||||
80000410: 14f72423 sw a5,328(a4)
|
||||
80000414: fa5ff06f j 800003b8 <__register_exitproc+0xc>
|
||||
80000418: 18c7a683 lw a3,396(a5)
|
||||
8000041c: 00170713 addi a4,a4,1
|
||||
80000420: 00e7a223 sw a4,4(a5)
|
||||
80000424: 00c6e633 or a2,a3,a2
|
||||
80000428: 18c7a623 sw a2,396(a5)
|
||||
8000042c: 010787b3 add a5,a5,a6
|
||||
80000430: 00b7a423 sw a1,8(a5)
|
||||
80000434: 00000513 li a0,0
|
||||
80000438: 00008067 ret
|
||||
8000043c: fff00513 li a0,-1
|
||||
80000440: 00008067 ret
|
||||
80000244 <__libc_init_array>:
|
||||
80000244: ff010113 addi sp,sp,-16
|
||||
80000248: 00812423 sw s0,8(sp)
|
||||
8000024c: 01212023 sw s2,0(sp)
|
||||
80000250: 80001437 lui s0,0x80001
|
||||
80000254: 80001937 lui s2,0x80001
|
||||
80000258: 57c40793 addi a5,s0,1404 # 8000157c <__stack_top+0x8100157c>
|
||||
8000025c: 57c90913 addi s2,s2,1404 # 8000157c <__stack_top+0x8100157c>
|
||||
80000260: 40f90933 sub s2,s2,a5
|
||||
80000264: 00112623 sw ra,12(sp)
|
||||
80000268: 00912223 sw s1,4(sp)
|
||||
8000026c: 40295913 srai s2,s2,0x2
|
||||
80000270: 02090063 beqz s2,80000290 <__libc_init_array+0x4c>
|
||||
80000274: 57c40413 addi s0,s0,1404
|
||||
80000278: 00000493 li s1,0
|
||||
8000027c: 00042783 lw a5,0(s0)
|
||||
80000280: 00148493 addi s1,s1,1
|
||||
80000284: 00440413 addi s0,s0,4
|
||||
80000288: 000780e7 jalr a5
|
||||
8000028c: fe9918e3 bne s2,s1,8000027c <__libc_init_array+0x38>
|
||||
80000290: 80001437 lui s0,0x80001
|
||||
80000294: 80001937 lui s2,0x80001
|
||||
80000298: 57c40793 addi a5,s0,1404 # 8000157c <__stack_top+0x8100157c>
|
||||
8000029c: 58090913 addi s2,s2,1408 # 80001580 <__stack_top+0x81001580>
|
||||
800002a0: 40f90933 sub s2,s2,a5
|
||||
800002a4: 40295913 srai s2,s2,0x2
|
||||
800002a8: 02090063 beqz s2,800002c8 <__libc_init_array+0x84>
|
||||
800002ac: 57c40413 addi s0,s0,1404
|
||||
800002b0: 00000493 li s1,0
|
||||
800002b4: 00042783 lw a5,0(s0)
|
||||
800002b8: 00148493 addi s1,s1,1
|
||||
800002bc: 00440413 addi s0,s0,4
|
||||
800002c0: 000780e7 jalr a5
|
||||
800002c4: fe9918e3 bne s2,s1,800002b4 <__libc_init_array+0x70>
|
||||
800002c8: 00c12083 lw ra,12(sp)
|
||||
800002cc: 00812403 lw s0,8(sp)
|
||||
800002d0: 00412483 lw s1,4(sp)
|
||||
800002d4: 00012903 lw s2,0(sp)
|
||||
800002d8: 01010113 addi sp,sp,16
|
||||
800002dc: 00008067 ret
|
||||
|
||||
80000444 <__call_exitprocs>:
|
||||
80000444: fd010113 addi sp,sp,-48
|
||||
80000448: 01412c23 sw s4,24(sp)
|
||||
8000044c: c281aa03 lw s4,-984(gp) # 80001990 <_global_impure_ptr>
|
||||
80000450: 03212023 sw s2,32(sp)
|
||||
80000454: 02112623 sw ra,44(sp)
|
||||
80000458: 148a2903 lw s2,328(s4)
|
||||
8000045c: 02812423 sw s0,40(sp)
|
||||
80000460: 02912223 sw s1,36(sp)
|
||||
80000464: 01312e23 sw s3,28(sp)
|
||||
80000468: 01512a23 sw s5,20(sp)
|
||||
8000046c: 01612823 sw s6,16(sp)
|
||||
80000470: 01712623 sw s7,12(sp)
|
||||
80000474: 01812423 sw s8,8(sp)
|
||||
80000478: 04090063 beqz s2,800004b8 <__call_exitprocs+0x74>
|
||||
8000047c: 00050b13 mv s6,a0
|
||||
80000480: 00058b93 mv s7,a1
|
||||
80000484: 00100a93 li s5,1
|
||||
80000488: fff00993 li s3,-1
|
||||
8000048c: 00492483 lw s1,4(s2)
|
||||
80000490: fff48413 addi s0,s1,-1
|
||||
80000494: 02044263 bltz s0,800004b8 <__call_exitprocs+0x74>
|
||||
80000498: 00249493 slli s1,s1,0x2
|
||||
8000049c: 009904b3 add s1,s2,s1
|
||||
800004a0: 040b8463 beqz s7,800004e8 <__call_exitprocs+0xa4>
|
||||
800004a4: 1044a783 lw a5,260(s1)
|
||||
800004a8: 05778063 beq a5,s7,800004e8 <__call_exitprocs+0xa4>
|
||||
800004ac: fff40413 addi s0,s0,-1
|
||||
800004b0: ffc48493 addi s1,s1,-4
|
||||
800004b4: ff3416e3 bne s0,s3,800004a0 <__call_exitprocs+0x5c>
|
||||
800004b8: 02c12083 lw ra,44(sp)
|
||||
800004bc: 02812403 lw s0,40(sp)
|
||||
800004c0: 02412483 lw s1,36(sp)
|
||||
800004c4: 02012903 lw s2,32(sp)
|
||||
800004c8: 01c12983 lw s3,28(sp)
|
||||
800004cc: 01812a03 lw s4,24(sp)
|
||||
800004d0: 01412a83 lw s5,20(sp)
|
||||
800004d4: 01012b03 lw s6,16(sp)
|
||||
800004d8: 00c12b83 lw s7,12(sp)
|
||||
800004dc: 00812c03 lw s8,8(sp)
|
||||
800004e0: 03010113 addi sp,sp,48
|
||||
800004e4: 00008067 ret
|
||||
800004e8: 00492783 lw a5,4(s2)
|
||||
800004ec: 0044a683 lw a3,4(s1)
|
||||
800004f0: fff78793 addi a5,a5,-1
|
||||
800004f4: 04878e63 beq a5,s0,80000550 <__call_exitprocs+0x10c>
|
||||
800004f8: 0004a223 sw zero,4(s1)
|
||||
800004fc: fa0688e3 beqz a3,800004ac <__call_exitprocs+0x68>
|
||||
80000500: 18892783 lw a5,392(s2)
|
||||
80000504: 008a9733 sll a4,s5,s0
|
||||
80000508: 00492c03 lw s8,4(s2)
|
||||
8000050c: 00f777b3 and a5,a4,a5
|
||||
80000510: 02079263 bnez a5,80000534 <__call_exitprocs+0xf0>
|
||||
80000514: 000680e7 jalr a3
|
||||
80000518: 00492703 lw a4,4(s2)
|
||||
8000051c: 148a2783 lw a5,328(s4)
|
||||
80000520: 01871463 bne a4,s8,80000528 <__call_exitprocs+0xe4>
|
||||
80000524: f8f904e3 beq s2,a5,800004ac <__call_exitprocs+0x68>
|
||||
80000528: f80788e3 beqz a5,800004b8 <__call_exitprocs+0x74>
|
||||
8000052c: 00078913 mv s2,a5
|
||||
80000530: f5dff06f j 8000048c <__call_exitprocs+0x48>
|
||||
80000534: 18c92783 lw a5,396(s2)
|
||||
80000538: 0844a583 lw a1,132(s1)
|
||||
8000053c: 00f77733 and a4,a4,a5
|
||||
80000540: 00071c63 bnez a4,80000558 <__call_exitprocs+0x114>
|
||||
80000544: 000b0513 mv a0,s6
|
||||
80000548: 000680e7 jalr a3
|
||||
8000054c: fcdff06f j 80000518 <__call_exitprocs+0xd4>
|
||||
80000550: 00892223 sw s0,4(s2)
|
||||
80000554: fa9ff06f j 800004fc <__call_exitprocs+0xb8>
|
||||
80000558: 00058513 mv a0,a1
|
||||
8000055c: 000680e7 jalr a3
|
||||
80000560: fb9ff06f j 80000518 <__call_exitprocs+0xd4>
|
||||
800002e0 <memset>:
|
||||
800002e0: 00f00313 li t1,15
|
||||
800002e4: 00050713 mv a4,a0
|
||||
800002e8: 02c37e63 bgeu t1,a2,80000324 <memset+0x44>
|
||||
800002ec: 00f77793 andi a5,a4,15
|
||||
800002f0: 0a079063 bnez a5,80000390 <memset+0xb0>
|
||||
800002f4: 08059263 bnez a1,80000378 <memset+0x98>
|
||||
800002f8: ff067693 andi a3,a2,-16
|
||||
800002fc: 00f67613 andi a2,a2,15
|
||||
80000300: 00e686b3 add a3,a3,a4
|
||||
80000304: 00b72023 sw a1,0(a4)
|
||||
80000308: 00b72223 sw a1,4(a4)
|
||||
8000030c: 00b72423 sw a1,8(a4)
|
||||
80000310: 00b72623 sw a1,12(a4)
|
||||
80000314: 01070713 addi a4,a4,16
|
||||
80000318: fed766e3 bltu a4,a3,80000304 <memset+0x24>
|
||||
8000031c: 00061463 bnez a2,80000324 <memset+0x44>
|
||||
80000320: 00008067 ret
|
||||
80000324: 40c306b3 sub a3,t1,a2
|
||||
80000328: 00269693 slli a3,a3,0x2
|
||||
8000032c: 00000297 auipc t0,0x0
|
||||
80000330: 005686b3 add a3,a3,t0
|
||||
80000334: 00c68067 jr 12(a3)
|
||||
80000338: 00b70723 sb a1,14(a4)
|
||||
8000033c: 00b706a3 sb a1,13(a4)
|
||||
80000340: 00b70623 sb a1,12(a4)
|
||||
80000344: 00b705a3 sb a1,11(a4)
|
||||
80000348: 00b70523 sb a1,10(a4)
|
||||
8000034c: 00b704a3 sb a1,9(a4)
|
||||
80000350: 00b70423 sb a1,8(a4)
|
||||
80000354: 00b703a3 sb a1,7(a4)
|
||||
80000358: 00b70323 sb a1,6(a4)
|
||||
8000035c: 00b702a3 sb a1,5(a4)
|
||||
80000360: 00b70223 sb a1,4(a4)
|
||||
80000364: 00b701a3 sb a1,3(a4)
|
||||
80000368: 00b70123 sb a1,2(a4)
|
||||
8000036c: 00b700a3 sb a1,1(a4)
|
||||
80000370: 00b70023 sb a1,0(a4)
|
||||
80000374: 00008067 ret
|
||||
80000378: 0ff5f593 andi a1,a1,255
|
||||
8000037c: 00859693 slli a3,a1,0x8
|
||||
80000380: 00d5e5b3 or a1,a1,a3
|
||||
80000384: 01059693 slli a3,a1,0x10
|
||||
80000388: 00d5e5b3 or a1,a1,a3
|
||||
8000038c: f6dff06f j 800002f8 <memset+0x18>
|
||||
80000390: 00279693 slli a3,a5,0x2
|
||||
80000394: 00000297 auipc t0,0x0
|
||||
80000398: 005686b3 add a3,a3,t0
|
||||
8000039c: 00008293 mv t0,ra
|
||||
800003a0: fa0680e7 jalr -96(a3)
|
||||
800003a4: 00028093 mv ra,t0
|
||||
800003a8: ff078793 addi a5,a5,-16
|
||||
800003ac: 40f70733 sub a4,a4,a5
|
||||
800003b0: 00f60633 add a2,a2,a5
|
||||
800003b4: f6c378e3 bgeu t1,a2,80000324 <memset+0x44>
|
||||
800003b8: f3dff06f j 800002f4 <memset+0x14>
|
||||
|
||||
800003bc <__register_exitproc>:
|
||||
800003bc: 800027b7 lui a5,0x80002
|
||||
800003c0: 9a87a703 lw a4,-1624(a5) # 800019a8 <__stack_top+0x810019a8>
|
||||
800003c4: 14872783 lw a5,328(a4)
|
||||
800003c8: 04078c63 beqz a5,80000420 <__register_exitproc+0x64>
|
||||
800003cc: 0047a703 lw a4,4(a5)
|
||||
800003d0: 01f00813 li a6,31
|
||||
800003d4: 06e84e63 blt a6,a4,80000450 <__register_exitproc+0x94>
|
||||
800003d8: 00271813 slli a6,a4,0x2
|
||||
800003dc: 02050663 beqz a0,80000408 <__register_exitproc+0x4c>
|
||||
800003e0: 01078333 add t1,a5,a6
|
||||
800003e4: 08c32423 sw a2,136(t1)
|
||||
800003e8: 1887a883 lw a7,392(a5)
|
||||
800003ec: 00100613 li a2,1
|
||||
800003f0: 00e61633 sll a2,a2,a4
|
||||
800003f4: 00c8e8b3 or a7,a7,a2
|
||||
800003f8: 1917a423 sw a7,392(a5)
|
||||
800003fc: 10d32423 sw a3,264(t1)
|
||||
80000400: 00200693 li a3,2
|
||||
80000404: 02d50463 beq a0,a3,8000042c <__register_exitproc+0x70>
|
||||
80000408: 00170713 addi a4,a4,1
|
||||
8000040c: 00e7a223 sw a4,4(a5)
|
||||
80000410: 010787b3 add a5,a5,a6
|
||||
80000414: 00b7a423 sw a1,8(a5)
|
||||
80000418: 00000513 li a0,0
|
||||
8000041c: 00008067 ret
|
||||
80000420: 14c70793 addi a5,a4,332
|
||||
80000424: 14f72423 sw a5,328(a4)
|
||||
80000428: fa5ff06f j 800003cc <__register_exitproc+0x10>
|
||||
8000042c: 18c7a683 lw a3,396(a5)
|
||||
80000430: 00170713 addi a4,a4,1
|
||||
80000434: 00e7a223 sw a4,4(a5)
|
||||
80000438: 00c6e633 or a2,a3,a2
|
||||
8000043c: 18c7a623 sw a2,396(a5)
|
||||
80000440: 010787b3 add a5,a5,a6
|
||||
80000444: 00b7a423 sw a1,8(a5)
|
||||
80000448: 00000513 li a0,0
|
||||
8000044c: 00008067 ret
|
||||
80000450: fff00513 li a0,-1
|
||||
80000454: 00008067 ret
|
||||
|
||||
80000458 <__call_exitprocs>:
|
||||
80000458: fd010113 addi sp,sp,-48
|
||||
8000045c: 800027b7 lui a5,0x80002
|
||||
80000460: 01412c23 sw s4,24(sp)
|
||||
80000464: 9a87aa03 lw s4,-1624(a5) # 800019a8 <__stack_top+0x810019a8>
|
||||
80000468: 03212023 sw s2,32(sp)
|
||||
8000046c: 02112623 sw ra,44(sp)
|
||||
80000470: 148a2903 lw s2,328(s4)
|
||||
80000474: 02812423 sw s0,40(sp)
|
||||
80000478: 02912223 sw s1,36(sp)
|
||||
8000047c: 01312e23 sw s3,28(sp)
|
||||
80000480: 01512a23 sw s5,20(sp)
|
||||
80000484: 01612823 sw s6,16(sp)
|
||||
80000488: 01712623 sw s7,12(sp)
|
||||
8000048c: 01812423 sw s8,8(sp)
|
||||
80000490: 04090063 beqz s2,800004d0 <__call_exitprocs+0x78>
|
||||
80000494: 00050b13 mv s6,a0
|
||||
80000498: 00058b93 mv s7,a1
|
||||
8000049c: 00100a93 li s5,1
|
||||
800004a0: fff00993 li s3,-1
|
||||
800004a4: 00492483 lw s1,4(s2)
|
||||
800004a8: fff48413 addi s0,s1,-1
|
||||
800004ac: 02044263 bltz s0,800004d0 <__call_exitprocs+0x78>
|
||||
800004b0: 00249493 slli s1,s1,0x2
|
||||
800004b4: 009904b3 add s1,s2,s1
|
||||
800004b8: 040b8463 beqz s7,80000500 <__call_exitprocs+0xa8>
|
||||
800004bc: 1044a783 lw a5,260(s1)
|
||||
800004c0: 05778063 beq a5,s7,80000500 <__call_exitprocs+0xa8>
|
||||
800004c4: fff40413 addi s0,s0,-1
|
||||
800004c8: ffc48493 addi s1,s1,-4
|
||||
800004cc: ff3416e3 bne s0,s3,800004b8 <__call_exitprocs+0x60>
|
||||
800004d0: 02c12083 lw ra,44(sp)
|
||||
800004d4: 02812403 lw s0,40(sp)
|
||||
800004d8: 02412483 lw s1,36(sp)
|
||||
800004dc: 02012903 lw s2,32(sp)
|
||||
800004e0: 01c12983 lw s3,28(sp)
|
||||
800004e4: 01812a03 lw s4,24(sp)
|
||||
800004e8: 01412a83 lw s5,20(sp)
|
||||
800004ec: 01012b03 lw s6,16(sp)
|
||||
800004f0: 00c12b83 lw s7,12(sp)
|
||||
800004f4: 00812c03 lw s8,8(sp)
|
||||
800004f8: 03010113 addi sp,sp,48
|
||||
800004fc: 00008067 ret
|
||||
80000500: 00492783 lw a5,4(s2)
|
||||
80000504: 0044a683 lw a3,4(s1)
|
||||
80000508: fff78793 addi a5,a5,-1
|
||||
8000050c: 04878e63 beq a5,s0,80000568 <__call_exitprocs+0x110>
|
||||
80000510: 0004a223 sw zero,4(s1)
|
||||
80000514: fa0688e3 beqz a3,800004c4 <__call_exitprocs+0x6c>
|
||||
80000518: 18892783 lw a5,392(s2)
|
||||
8000051c: 008a9733 sll a4,s5,s0
|
||||
80000520: 00492c03 lw s8,4(s2)
|
||||
80000524: 00f777b3 and a5,a4,a5
|
||||
80000528: 02079263 bnez a5,8000054c <__call_exitprocs+0xf4>
|
||||
8000052c: 000680e7 jalr a3
|
||||
80000530: 00492703 lw a4,4(s2)
|
||||
80000534: 148a2783 lw a5,328(s4)
|
||||
80000538: 01871463 bne a4,s8,80000540 <__call_exitprocs+0xe8>
|
||||
8000053c: f8f904e3 beq s2,a5,800004c4 <__call_exitprocs+0x6c>
|
||||
80000540: f80788e3 beqz a5,800004d0 <__call_exitprocs+0x78>
|
||||
80000544: 00078913 mv s2,a5
|
||||
80000548: f5dff06f j 800004a4 <__call_exitprocs+0x4c>
|
||||
8000054c: 18c92783 lw a5,396(s2)
|
||||
80000550: 0844a583 lw a1,132(s1)
|
||||
80000554: 00f77733 and a4,a4,a5
|
||||
80000558: 00071c63 bnez a4,80000570 <__call_exitprocs+0x118>
|
||||
8000055c: 000b0513 mv a0,s6
|
||||
80000560: 000680e7 jalr a3
|
||||
80000564: fcdff06f j 80000530 <__call_exitprocs+0xd8>
|
||||
80000568: 00892223 sw s0,4(s2)
|
||||
8000056c: fa9ff06f j 80000514 <__call_exitprocs+0xbc>
|
||||
80000570: 00058513 mv a0,a1
|
||||
80000574: 000680e7 jalr a3
|
||||
80000578: fb9ff06f j 80000530 <__call_exitprocs+0xd8>
|
||||
|
||||
Disassembly of section .init_array:
|
||||
|
||||
80001564 <__init_array_start>:
|
||||
80001564: 00bc addi a5,sp,72
|
||||
80001566: 8000 0x8000
|
||||
8000157c <__init_array_start>:
|
||||
8000157c: 00c4 addi s1,sp,68
|
||||
8000157e: 8000 0x8000
|
||||
|
||||
Disassembly of section .data:
|
||||
|
||||
80001568 <impure_data>:
|
||||
80001568: 0000 unimp
|
||||
8000156a: 0000 unimp
|
||||
8000156c: 1854 addi a3,sp,52
|
||||
8000156e: 8000 0x8000
|
||||
80001570: 18bc addi a5,sp,120
|
||||
80001572: 8000 0x8000
|
||||
80001574: 1924 addi s1,sp,184
|
||||
80001576: 8000 0x8000
|
||||
80001580 <impure_data>:
|
||||
80001580: 0000 unimp
|
||||
80001582: 0000 unimp
|
||||
80001584: 186c addi a1,sp,60
|
||||
80001586: 8000 0x8000
|
||||
80001588: 18d4 addi a3,sp,116
|
||||
8000158a: 8000 0x8000
|
||||
8000158c: 193c addi a5,sp,184
|
||||
8000158e: 8000 0x8000
|
||||
...
|
||||
80001610: 0001 nop
|
||||
80001612: 0000 unimp
|
||||
80001614: 0000 unimp
|
||||
80001616: 0000 unimp
|
||||
80001618: 330e fld ft6,224(sp)
|
||||
8000161a: abcd j 80001c0c <__BSS_END__+0x278>
|
||||
8000161c: 1234 addi a3,sp,296
|
||||
8000161e: e66d bnez a2,80001708 <impure_data+0x1a0>
|
||||
80001620: deec sw a1,124(a3)
|
||||
80001622: 0005 c.nop 1
|
||||
80001624: 0000000b 0xb
|
||||
80001628: 0001 nop
|
||||
8000162a: 0000 unimp
|
||||
8000162c: 0000 unimp
|
||||
8000162e: 0000 unimp
|
||||
80001630: 330e fld ft6,224(sp)
|
||||
80001632: abcd j 80001c24 <__BSS_END__+0x278>
|
||||
80001634: 1234 addi a3,sp,296
|
||||
80001636: e66d bnez a2,80001720 <impure_data+0x1a0>
|
||||
80001638: deec sw a1,124(a3)
|
||||
8000163a: 0005 c.nop 1
|
||||
8000163c: 0000000b 0xb
|
||||
...
|
||||
|
||||
Disassembly of section .sdata:
|
||||
|
||||
80001990 <_global_impure_ptr>:
|
||||
80001990: 1568 addi a0,sp,684
|
||||
80001992: 8000 0x8000
|
||||
800019a8 <_global_impure_ptr>:
|
||||
800019a8: 1580 addi s0,sp,736
|
||||
800019aa: 8000 0x8000
|
||||
|
||||
Disassembly of section .comment:
|
||||
|
||||
|
@ -458,11 +466,11 @@ Disassembly of section .comment:
|
|||
Disassembly of section .riscv.attributes:
|
||||
|
||||
00000000 <.riscv.attributes>:
|
||||
0: 2541 jal 680 <_start-0x7ffff980>
|
||||
0: 2541 jal 680 <__stack_size+0x280>
|
||||
2: 0000 unimp
|
||||
4: 7200 flw fs0,32(a2)
|
||||
6: 7369 lui t1,0xffffa
|
||||
8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec>
|
||||
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
|
||||
c: 0000001b 0x1b
|
||||
10: 1004 addi s1,sp,32
|
||||
12: 7205 lui tp,0xfffe1
|
||||
|
@ -470,6 +478,6 @@ Disassembly of section .riscv.attributes:
|
|||
16: 6932 flw fs2,12(sp)
|
||||
18: 7032 flw ft0,44(sp)
|
||||
1a: 5f30 lw a2,120(a4)
|
||||
1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e>
|
||||
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
|
||||
1e: 3070 fld fa2,224(s0)
|
||||
20: 665f 7032 0030 0x307032665f
|
||||
|
|
Binary file not shown.
|
@ -4,7 +4,8 @@
|
|||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t count;
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
|
|
|
@ -117,7 +117,8 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
uint32_t num_points = count * max_cores * max_warps * max_threads;
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
uint32_t num_points = count * num_tasks;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
|
@ -137,7 +138,8 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.count = count;
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
|
|
Binary file not shown.
|
@ -3,14 +3,14 @@
|
|||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(void* arg) {
|
||||
void kernel_body(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
|
||||
|
@ -19,7 +19,5 @@ void kernel_body(void* arg) {
|
|||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
int num_warps = vx_num_warps();
|
||||
int num_threads = vx_num_threads();
|
||||
vx_spawn_warps(num_warps, num_threads, kernel_body, arg);
|
||||
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
|
||||
}
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -5,7 +5,8 @@
|
|||
|
||||
struct kernel_arg_t {
|
||||
uint32_t testid;
|
||||
uint32_t count;
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
|
|
|
@ -173,7 +173,8 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
int num_points = count * max_cores * max_warps * max_threads;
|
||||
int num_tasks = max_cores * max_warps * max_threads;
|
||||
int num_points = count * num_tasks;
|
||||
size_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
|
@ -193,7 +194,8 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.count = count;
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
|
||||
|
|
Binary file not shown.
|
@ -4,15 +4,15 @@
|
|||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
|
||||
typedef void (*PFN_Kernel)(void* arg);
|
||||
typedef void (*PFN_Kernel)(int task_id, void* arg);
|
||||
|
||||
void kernel_iadd(void* arg) {
|
||||
void kernel_iadd(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
|
@ -22,13 +22,13 @@ void kernel_iadd(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_imul(void* arg) {
|
||||
void kernel_imul(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
|
@ -38,13 +38,13 @@ void kernel_imul(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_idiv(void* arg) {
|
||||
void kernel_idiv(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
|
@ -54,13 +54,13 @@ void kernel_idiv(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_idiv_mul(void* arg) {
|
||||
void kernel_idiv_mul(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
|
@ -72,13 +72,13 @@ void kernel_idiv_mul(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fadd(void* arg) {
|
||||
void kernel_fadd(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -88,13 +88,13 @@ void kernel_fadd(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fsub(void* arg) {
|
||||
void kernel_fsub(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -104,13 +104,13 @@ void kernel_fsub(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fmul(void* arg) {
|
||||
void kernel_fmul(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -120,13 +120,13 @@ void kernel_fmul(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fmadd(void* arg) {
|
||||
void kernel_fmadd(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -136,13 +136,13 @@ void kernel_fmadd(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fmsub(void* arg) {
|
||||
void kernel_fmsub(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -152,13 +152,13 @@ void kernel_fmsub(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fnmadd(void* arg) {
|
||||
void kernel_fnmadd(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -168,13 +168,13 @@ void kernel_fnmadd(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fnmsub(void* arg) {
|
||||
void kernel_fnmsub(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -184,13 +184,13 @@ void kernel_fnmsub(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fnmadd_madd(void* arg) {
|
||||
void kernel_fnmadd_madd(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -202,13 +202,13 @@ void kernel_fnmadd_madd(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fdiv(void* arg) {
|
||||
void kernel_fdiv(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -218,13 +218,13 @@ void kernel_fdiv(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fdiv2(void* arg) {
|
||||
void kernel_fdiv2(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -236,13 +236,13 @@ void kernel_fdiv2(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_fsqrt(void* arg) {
|
||||
void kernel_fsqrt(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -252,13 +252,13 @@ void kernel_fsqrt(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_ftoi(void* arg) {
|
||||
void kernel_ftoi(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -269,13 +269,13 @@ void kernel_ftoi(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_ftou(void* arg) {
|
||||
void kernel_ftou(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
|
@ -286,13 +286,13 @@ void kernel_ftou(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_itof(void* arg) {
|
||||
void kernel_itof(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
|
@ -303,13 +303,13 @@ void kernel_itof(void* arg) {
|
|||
}
|
||||
}
|
||||
|
||||
void kernel_utof(void* arg) {
|
||||
void kernel_utof(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
uint32_t count = _arg->task_size;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
|
@ -344,7 +344,5 @@ static const PFN_Kernel sc_tests[] = {
|
|||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
int num_warps = vx_num_warps();
|
||||
int num_threads = vx_num_threads();
|
||||
vx_spawn_warps(num_warps, num_threads, sc_tests[arg->testid], arg);
|
||||
vx_spawn_tasks(arg->num_tasks, sc_tests[arg->testid], arg);
|
||||
}
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -327,14 +327,9 @@
|
|||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// Size of cache block in bytes
|
||||
`ifndef SM_BLOCK_SIZE
|
||||
`define SM_BLOCK_SIZE 1024
|
||||
`endif
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef SMEM_SIZE
|
||||
`define SMEM_SIZE (`NUM_WARPS * `NUM_THREADS * `SM_BLOCK_SIZE)
|
||||
`define SMEM_SIZE (1024 * `NUM_WARPS * `NUM_THREADS)
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
|
|
|
@ -109,9 +109,9 @@ module VX_csr_data #(
|
|||
`CSR_FRM : read_data_r = 32'(csr_frm[read_wid]);
|
||||
`CSR_FCSR : read_data_r = 32'(csr_fcsr[read_wid]);
|
||||
|
||||
`CSR_LWID : read_data_r = 32'(read_wid);
|
||||
`CSR_WTID ,
|
||||
`CSR_WTID ,
|
||||
`CSR_LTID ,
|
||||
`CSR_LWID : read_data_r = 32'(read_wid);
|
||||
`CSR_GTID ,
|
||||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
|
|
|
@ -319,7 +319,7 @@
|
|||
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
|
||||
// Block size in bytes
|
||||
`define SBANK_LINE_SIZE `SM_BLOCK_SIZE
|
||||
`define SBANK_LINE_SIZE 4
|
||||
|
||||
// Word size in bytes
|
||||
`define SWORD_SIZE 4
|
||||
|
|
|
@ -8,9 +8,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void (*func_t)(void *);
|
||||
typedef void (*pfn_callback)(int task_id, void *arg);
|
||||
|
||||
void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args);
|
||||
void vx_spawn_tasks(int num_tasks, pfn_callback callback , void * args);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -8,59 +8,103 @@ extern "C" {
|
|||
|
||||
#define NUM_CORES_MAX 16
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
typedef struct {
|
||||
func_t function;
|
||||
void * arguments;
|
||||
int nthreads;
|
||||
} spawn_t;
|
||||
pfn_callback callback;
|
||||
void * args;
|
||||
int offset;
|
||||
int N;
|
||||
int R;
|
||||
} wspawn_args_t;
|
||||
|
||||
spawn_t* g_spawn[NUM_CORES_MAX];
|
||||
wspawn_args_t* g_wspawn_args[NUM_CORES_MAX];
|
||||
|
||||
void spawn_warp_all() {
|
||||
// active all threads
|
||||
int num_threads = vx_num_threads();
|
||||
vx_tmc(num_threads);
|
||||
void spawn_tasks_callback() {
|
||||
vx_tmc(vx_num_threads());
|
||||
|
||||
int core_id = vx_core_id();
|
||||
spawn_t* p_spawn = g_spawn[core_id];
|
||||
int core_id = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
int tid = vx_thread_id();
|
||||
int NT = vx_num_threads();
|
||||
|
||||
wspawn_args_t* p_wspawn_args = g_wspawn_args[core_id];
|
||||
|
||||
// call user routine
|
||||
p_spawn->function(p_spawn->arguments);
|
||||
int wK = (p_wspawn_args->N * wid) + MIN(p_wspawn_args->R, wid);
|
||||
int tK = p_wspawn_args->N + (wid < p_wspawn_args->R);
|
||||
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
|
||||
|
||||
// resume single-warp execution on exit
|
||||
int wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
for (int task_id = offset, N = task_id + tK; task_id < N; ++task_id) {
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->args);
|
||||
}
|
||||
|
||||
vx_tmc(0 == wid);
|
||||
}
|
||||
|
||||
void spawn_warp_threads(int num_threads) {
|
||||
// active all threads
|
||||
vx_tmc(num_threads);
|
||||
void spawn_remaining_tasks_callback(int nthreads) {
|
||||
vx_tmc(nthreads);
|
||||
|
||||
int core_id = vx_core_id();
|
||||
spawn_t* p_spawn = g_spawn[core_id];
|
||||
int core_id = vx_core_id();
|
||||
int tid = vx_thread_gid();
|
||||
|
||||
// call user routine
|
||||
p_spawn->function(p_spawn->arguments);
|
||||
wspawn_args_t* p_wspawn_args = g_wspawn_args[core_id];
|
||||
|
||||
// resume single-warp execution on exit
|
||||
int wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
int task_id = p_wspawn_args->offset + tid;
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->args);
|
||||
|
||||
vx_tmc(1);
|
||||
}
|
||||
|
||||
void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args) {
|
||||
int core_id = vx_core_id();
|
||||
if (core_id >= NUM_CORES_MAX)
|
||||
return;
|
||||
|
||||
spawn_t spawn = { func_ptr, args, num_threads };
|
||||
g_spawn[core_id] = &spawn;
|
||||
void vx_spawn_tasks(int num_tasks, pfn_callback callback , void * args) {
|
||||
// device specs
|
||||
int NC = vx_num_cores();
|
||||
int NW = vx_num_warps();
|
||||
int NT = vx_num_threads();
|
||||
|
||||
if (num_warps > 1) {
|
||||
vx_wspawn(num_warps, (unsigned)spawn_warp_all);
|
||||
}
|
||||
spawn_warp_threads(num_threads);
|
||||
// current core id
|
||||
int core_id = vx_core_id();
|
||||
if (core_id >= NUM_CORES_MAX)
|
||||
return;
|
||||
|
||||
// calculate necessary active cores
|
||||
int WT = NW * NT;
|
||||
int nC = (num_tasks > WT) ? (num_tasks / WT) : 1;
|
||||
int nc = MIN(nC, NC);
|
||||
if (core_id >= nc)
|
||||
return; // terminate unused cores
|
||||
|
||||
// number of tasks per core
|
||||
int tasks_per_core = num_tasks / nc;
|
||||
int tasks_per_core0 = tasks_per_core;
|
||||
if (core_id == (NC-1)) {
|
||||
int QC_r = num_tasks - (nc * tasks_per_core0);
|
||||
tasks_per_core0 += QC_r; // last core executes remaining tasks
|
||||
}
|
||||
|
||||
// number of tasks per warp
|
||||
int nW = tasks_per_core0 / NT; // total warps per core
|
||||
int rT = tasks_per_core0 - (nW * NT); // remaining threads
|
||||
int fW = (nW >= NW) ? (nW / NW) : 0; // full warps iterations
|
||||
int rW = (fW != 0) ? (nW - fW * NW) : 0; // remaining warps
|
||||
if (0 == fW)
|
||||
fW = 1;
|
||||
|
||||
//--
|
||||
wspawn_args_t wspawn_args = { callback, args, core_id * tasks_per_core, fW, rW };
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
//--
|
||||
if (nW > 1) {
|
||||
int nw = MIN(nW, NW);
|
||||
vx_wspawn(nw, (unsigned)&spawn_tasks_callback);
|
||||
spawn_tasks_callback();
|
||||
}
|
||||
|
||||
//--
|
||||
if (rT != 0) {
|
||||
wspawn_args.offset = tasks_per_core0 - rT;
|
||||
spawn_remaining_tasks_callback(rT);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -33,21 +33,10 @@ unsigned z[] = {0, 0, 0, 0,
|
|||
0, 0, 0, 0,
|
||||
0, 0, 0, 0};
|
||||
|
||||
void mat_add_kernel(void * void_arguments)
|
||||
void mat_add_kernel(int task_id, void * void_arguments)
|
||||
{
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
__if (valid)
|
||||
{
|
||||
unsigned index = (wid * arguments->numColums) + tid;
|
||||
arguments->z[index] = arguments->x[index] + arguments->y[index];
|
||||
}
|
||||
__endif
|
||||
arguments->z[task_id] = arguments->x[task_id] + arguments->y[task_id];
|
||||
}
|
||||
|
||||
void vx_print_mat(unsigned * matPtr, int numRows, int numCols)
|
||||
|
@ -62,15 +51,11 @@ void vx_print_mat(unsigned * matPtr, int numRows, int numCols)
|
|||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Main is called with all threads active of warp 0
|
||||
vx_tmc(1);
|
||||
|
||||
int main() {
|
||||
// void * hellp = malloc(4);
|
||||
vx_printf("Confirm Dev Main\n");
|
||||
|
||||
vx_printf("vx_spawn_warps\n");
|
||||
vx_printf("vx_spawn_tasks\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
|
@ -79,12 +64,8 @@ int main()
|
|||
arguments.numColums = 4;
|
||||
arguments.numRows = 4;
|
||||
|
||||
|
||||
int numWarps = 4;
|
||||
int numThreads = 4;
|
||||
|
||||
// First kernel call
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_tasks(arguments.numRows * arguments.numColums, mat_add_kernel, &arguments);
|
||||
vx_print_mat(z, arguments.numRows, arguments.numColums);
|
||||
|
||||
|
||||
|
@ -95,8 +76,9 @@ int main()
|
|||
arguments.numRows = 4;
|
||||
|
||||
// Second Kernel Call
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_tasks(arguments.numRows * arguments.numColums, mat_add_kernel, &arguments);
|
||||
vx_print_mat(z, arguments.numRows, arguments.numColums);
|
||||
|
||||
vx_prints("Passed!\n");
|
||||
|
||||
return 0;
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -68,7 +68,7 @@ Disassembly of section .text:
|
|||
800000c0: 7f000117 auipc sp,0x7f000
|
||||
800000c4: f4010113 addi sp,sp,-192 # ff000000 <__stack_top>
|
||||
800000c8: 40000593 li a1,1024
|
||||
800000cc: cc202673 csrr a2,0xcc2
|
||||
800000cc: cc102673 csrr a2,0xcc1
|
||||
800000d0: 02c585b3 mul a1,a1,a2
|
||||
800000d4: 40b10133 sub sp,sp,a1
|
||||
800000d8: cc3026f3 csrr a3,0xcc3
|
||||
|
|
Binary file not shown.
|
@ -11,7 +11,7 @@
|
|||
:1000900093070000638807003705008013054513A8
|
||||
:1000A0006F00C00467800000130500006B000500AE
|
||||
:1000B000732500FC6B0005009761010093810175B9
|
||||
:1000C0001701007F130101F493050040732620CC33
|
||||
:1000C0001701007F130101F493050040732610CC43
|
||||
:1000D000B385C5023301B140F32630CC63860600F8
|
||||
:1000E000130500006B000500678000009305050004
|
||||
:1000F0009306000013060000130500006F30400156
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -28,21 +28,9 @@ unsigned z[] = {0, 0, 0, 0,
|
|||
0, 0, 0, 0,
|
||||
0, 0, 0, 0};
|
||||
|
||||
void mat_add_kernel(void * void_arguments) {
|
||||
void mat_add_kernel(int task_id, void * void_arguments) {
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
// __if (valid)
|
||||
// {
|
||||
unsigned index = (wid * arguments->numColums) + tid;
|
||||
unsigned val = arguments->x[index] + arguments->y[index];
|
||||
arguments->z[index] = val;
|
||||
// }
|
||||
// __endif
|
||||
arguments->z[task_id] = arguments->x[task_id] + arguments->y[task_id];
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
@ -98,7 +86,7 @@ int main() {
|
|||
ptr++;
|
||||
}
|
||||
|
||||
vx_printf("vx_spawn_warps mat_add_kernel\n");
|
||||
vx_printf("vx_spawn_tasks mat_add_kernel\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
|
@ -107,16 +95,13 @@ int main() {
|
|||
arguments.numColums = 4;
|
||||
arguments.numRows = 4;
|
||||
|
||||
int numWarps = 4;
|
||||
int numThreads = 4;
|
||||
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_tasks(arguments.numRows * arguments.numColums, mat_add_kernel, &arguments);
|
||||
|
||||
vx_printf("Waiting to ensure other warps are done... (Takes a while)\n");
|
||||
for (int i = 0; i < 5000; i++) {}
|
||||
|
||||
for (int i = 0; i < numWarps; i++) {
|
||||
for (int j = 0; j < numThreads; j++) {
|
||||
for (int i = 0; i < arguments.numRows; i++) {
|
||||
for (int j = 0; j < arguments.numColums; j++) {
|
||||
unsigned index = (i * arguments.numColums) + j;
|
||||
vx_printf("0x%x ", z[index]);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue