vec add successfull

This commit is contained in:
fares 2019-11-16 16:07:10 -05:00
parent 4049f314b0
commit ed5c628c3b
8 changed files with 512031 additions and 81372 deletions

View file

@ -1,7 +1,8 @@
COMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-g++
# CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostartfiles
# CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostartfiles
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,linker.ld -march=rv32im -mabi=ilp32
DMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy

View file

@ -11,28 +11,55 @@ ENTRY(_start)
SECTIONS
{
PROVIDE(__stack_top = 0x6ffff000);
. = 0x70000000;
.FileIO :
{
*(.FileIO)
}
. = 0x80000000;
.text :
{
*(.text)
*(.*)
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
*(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
*(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
*(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}
.plt : { *(.plt) }
.iplt : { *(.iplt) }
.fini :
@ -42,14 +69,174 @@ SECTIONS
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
PROVIDE (_edata = .);
PROVIDE (_end = .);
PROVIDE (__global_pointer$ = .);
. = 0x81000000;
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
. = 0x82000000;
.comment : { *(.comment) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
}
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
. = DATA_SEGMENT_RELRO_END (0, .);
.data :
{
__DATA_BEGIN__ = .;
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
__SDATA_BEGIN__ = .;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we do not
pad the .data section. */
. = ALIGN(. != 0 ? 32 / 8 : 1);
}
. = ALIGN(32 / 8);
. = SEGMENT_START("ldata-segment", .);
. = ALIGN(32 / 8);
__BSS_END__ = .;
__global_pointer$ = MIN(__SDATA_BEGIN__ + 0x800,
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}

View file

@ -0,0 +1,242 @@
/* ---- Original Script: /opt/riscv32i/riscv32-unknown-elf/lib/ldscripts/elf32lriscv.x ---- */
/* Default linker script, for normal executables */
/* Copyright (C) 2014-2017 Free Software Foundation, Inc.
Copying and distribution of this script, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. */
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
"elf32-littleriscv")
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
PROVIDE(__stack_top = 0x6ffff000);
. = 0x80000000;
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
*(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
*(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
*(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) }
.iplt : { *(.iplt) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
}
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
. = DATA_SEGMENT_RELRO_END (0, .);
.data :
{
__DATA_BEGIN__ = .;
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
__SDATA_BEGIN__ = .;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we do not
pad the .data section. */
. = ALIGN(. != 0 ? 32 / 8 : 1);
}
. = ALIGN(32 / 8);
. = SEGMENT_START("ldata-segment", .);
. = ALIGN(32 / 8);
__BSS_END__ = .;
__global_pointer$ = MIN(__SDATA_BEGIN__ + 0x800,
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}

View file

@ -59,16 +59,16 @@ extern "C" {
#endif
int _pocl_register_kernel(const char* name, const void* pfn, uint32_t num_args, uint32_t num_locals, const uint8_t* arg_types, const uint32_t* local_sizes) {
printf("******** _pocl_register_kernel\n");
printf("Name to register: %s\n", name);
printf("PTR of name: %x\n", name);
//printf("******** _pocl_register_kernel\n");
//printf("Name to register: %s\n", name);
//printf("PTR of name: %x\n", name);
if (g_num_kernels == MAX_KERNELS)
{
printf("ERROR: REACHED MAX KERNELS\n");
//printf("ERROR: REACHED MAX KERNELS\n");
return -1;
}
printf("Going to register at index: %d\n", g_num_kernels);
//printf("Going to register at index: %d\n", g_num_kernels);
kernel_info_t* kernel = g_kernels + g_num_kernels++;
kernel->name = name;
@ -77,23 +77,23 @@ int _pocl_register_kernel(const char* name, const void* pfn, uint32_t num_args,
kernel->num_locals = num_locals;
kernel->arg_types = arg_types;
kernel->local_sizes = local_sizes;
printf("New kernel name: %s\n", kernel->name);
//printf("New kernel name: %s\n", kernel->name);
return 0;
}
int _pocl_query_kernel(const char* name, const void** p_pfn, uint32_t* p_num_args, uint32_t* p_num_locals, const uint8_t** p_arg_types, const uint32_t** p_local_sizes) {
printf("********* Inside _pocl_query_kernel\n");
printf("name: %s\n", name);
printf("g_num_kernels: %d\n", g_num_kernels);
//printf("********* Inside _pocl_query_kernel\n");
//printf("name: %s\n", name);
//printf("g_num_kernels: %d\n", g_num_kernels);
for (int i = 0; i < g_num_kernels; ++i) {
printf("Currently quering index %d\n", i);
//printf("Currently quering index %d\n", i);
kernel_info_t* kernel = g_kernels + i;
if (strcmp(kernel->name, name) != 0)
{
printf("STR CMP failed! kernel->name = %s \t name: %s\n", kernel->name, name);
//printf("STR CMP failed! kernel->name = %s \t name: %s\n", kernel->name, name);
continue;
}
printf("!!!!!!!!!STR CMP PASSED\n");
//printf("!!!!!!!!!STR CMP PASSED\n");
if (p_pfn) *p_pfn = kernel->pfn;
if (p_num_args) *p_num_args = kernel->num_args;
if (p_num_locals) *p_num_locals = kernel->num_locals;
@ -138,8 +138,7 @@ void cleanup() {
int main (int argc, char **argv) {
vx_tmc(1);
vx_print_str("Hello from vecadd\n");
printf("New cleared vecadd running\n");
//printf("\n\n\n\nReal Vecadd running\n");
cl_platform_id platform_id;
cl_device_id device_id;
@ -149,18 +148,18 @@ int main (int argc, char **argv) {
// Getting platform and device information
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
printf("Got platform id: %x\n", platform_id);
//printf("Got platform id: %x\n", platform_id);
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
printf("Got platformID and deviceID\n");
//printf("Got platformID and deviceID\n");
// Creating context.
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
printf("Got context\n");
//printf("Got context\n");
// Memory buffers for each array
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, SIZE * sizeof(cl_int), NULL, &_err));
@ -168,14 +167,14 @@ int main (int argc, char **argv) {
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, SIZE * sizeof(cl_int), NULL, &_err));
printf("Created buffers\n");
//printf("Created buffers\n");
// Allocate memories for input arrays and output arrays.
A = (cl_int*)malloc(sizeof(cl_int)*SIZE);
B = (cl_int*)malloc(sizeof(cl_int)*SIZE);
C = (cl_int*)malloc(sizeof(cl_int)*SIZE);
printf("Allocated memory: A=%x\tB=%x\tC=%x\n", A, B, C);
//printf("Allocated memory: A=%x\tB=%x\tC=%x\n", A, B, C);
// Initialize values for array members.
for (i=0; i<SIZE; ++i) {
@ -183,12 +182,12 @@ int main (int argc, char **argv) {
B[i] = i*2+1;
}
printf("About to call clCreateProgramWithBuiltInKernels\n");
//printf("About to call clCreateProgramWithBuiltInKernels\n");
// Create program from kernel source
program = CL_CHECK2(clCreateProgramWithBuiltInKernels(context, 1, &device_id, KERNEL_NAME, &_err));
printf("Returned from clCreateProgramWithBuiltInKernels\n");
//printf("Returned from clCreateProgramWithBuiltInKernels\n");
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,66 +1,77 @@
.text
# .section .init, "ax"
# .global _start
# _start:
# .cfi_startproc
# .cfi_undefined ra
# .option push
# .option norelax
# la gp, __global_pointer$
# .option pop
# la sp, __stack_top
# add s0, sp, zero
# jal zero, main
# .cfi_endproc
# .end
.section .init, "ax"
.global _start
.type _start, @function
_start:
# Initialize GP
# Initialize SP
# la sp, __stack_top
la a1, vx_set_sp
li a0, 4
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
jal vx_set_sp
li a0, 1
.word 0x0005006b # tmc 4
# Initialize global pointer
.option push
.option norelax
1:auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
.option pop
# call __cxx_global_var_init
# Clear the bss segment
la a0, _edata
la a2, _end
sub a2, a2, a0
li a1, 0
call memset
la a0, __libc_fini_array # Register global termination functions
call atexit # to be called upon exit
call __libc_init_array # Run global initialization functions
# lw a0, 0(sp) # a0 = argc
# addi a1, sp, __SIZEOF_POINTER__ # a1 = argv
# li a2, 0 # a2 = envp = NULL
.word 0x0005006b # tmc 1
# Initialize global pointer
.option push
.option norelax
1:auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
.option pop
# call __cxx_global_var_init
# Clear the bss segment
la a0, _edata
la a2, _end
sub a2, a2, a0
li a1, 0
call memset
la a0, __libc_fini_array # Register global termination functions
call atexit # to be called upon exit
call __libc_init_array # Run global initialization functions
li a0, 4
.word 0x0005006b # tmc 4
call main
tail exit
.size _start, .-_start
.word 0x0005006b # tmc 4
call main
tail exit
.size _start, .-_start
.section .text
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
li a0, 4
.word 0x0005006b # tmc 4
li a0, 4
.word 0x0005006b # tmc 4
csrr a3, 0x21 # get wid
slli a3, a3, 0x1a # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
sub sp, sp, a3 # shoft per warp
add sp, sp, a2 # shift sp for better performance
csrr a3, 0x21 # get wid
slli a3, a3, 0x1a # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
sub sp, sp, a3 # shoft per warp
add sp, sp, a2 # shift sp for better performance
csrr a3, 0x21 # get wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
RETURN:
ret
csrr a3, 0x21 # get wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
RETURN:
ret