mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
adding support for TLS global variables
This commit is contained in:
parent
77002dd06a
commit
da834a28df
4 changed files with 93 additions and 37 deletions
|
@ -89,12 +89,22 @@ SECTIONS
|
|||
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
|
||||
/* Thread Local Storage sections */
|
||||
.tdata :
|
||||
.tdata :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tdata_start = .);
|
||||
*(.tdata .tdata.* .gnu.linkonce.td.*)
|
||||
PROVIDE_HIDDEN (__tdata_end = .);
|
||||
}
|
||||
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
|
||||
PROVIDE (__tdata_size = SIZEOF (.tdata));
|
||||
.tbss :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tbss_start = .);
|
||||
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
|
||||
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
|
||||
PROVIDE_HIDDEN (__tbss_end = .);
|
||||
}
|
||||
PROVIDE (__tbss_size = SIZEOF (.tbss));
|
||||
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
|
|
|
@ -89,12 +89,22 @@ SECTIONS
|
|||
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
|
||||
/* Thread Local Storage sections */
|
||||
.tdata :
|
||||
.tdata :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tdata_start = .);
|
||||
*(.tdata .tdata.* .gnu.linkonce.td.*)
|
||||
PROVIDE_HIDDEN (__tdata_end = .);
|
||||
}
|
||||
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
|
||||
PROVIDE (__tdata_size = SIZEOF (.tdata));
|
||||
.tbss :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tbss_start = .);
|
||||
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
|
||||
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
|
||||
PROVIDE_HIDDEN (__tbss_end = .);
|
||||
}
|
||||
PROVIDE (__tbss_size = SIZEOF (.tbss));
|
||||
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
|
|
|
@ -4,32 +4,44 @@
|
|||
.global _start
|
||||
.type _start, @function
|
||||
_start:
|
||||
|
||||
# execute stack initialization on all warps
|
||||
la a1, vx_set_sp
|
||||
csrr a0, CSR_NW # get num warps
|
||||
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
|
||||
jal vx_set_sp
|
||||
|
||||
# initialize per-thread registers
|
||||
csrr a0, CSR_NW # get num warps
|
||||
la a1, init_regs
|
||||
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
|
||||
jal init_regs
|
||||
# return back to single thread execution
|
||||
li a0, 1
|
||||
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
|
||||
|
||||
# initialize TLS for all warps
|
||||
csrr a0, CSR_NW # get num warps
|
||||
la a1, __init_tls
|
||||
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
|
||||
call __init_tls
|
||||
# return back to single thread execution
|
||||
li a0, 1
|
||||
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
|
||||
|
||||
# Clear the bss segment
|
||||
# clear BSS segment
|
||||
la a0, _edata
|
||||
la a2, _end
|
||||
sub a2, a2, a0
|
||||
li a1, 0
|
||||
call memset
|
||||
|
||||
# Initialize trap vector
|
||||
# a t0, trap_entry
|
||||
# csrw mtvec, t0
|
||||
|
||||
# Register global termination functions
|
||||
la a0, __libc_fini_array
|
||||
la a0, __libc_fini_array
|
||||
|
||||
# to be called upon exit
|
||||
call atexit
|
||||
call atexit
|
||||
|
||||
# Run global initialization functions
|
||||
call __libc_init_array
|
||||
call __libc_init_array
|
||||
|
||||
# call main program routine
|
||||
call main
|
||||
|
@ -49,9 +61,9 @@ _exit:
|
|||
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
|
||||
|
||||
.section .text
|
||||
.type vx_set_sp, @function
|
||||
.global vx_set_sp
|
||||
vx_set_sp:
|
||||
.type init_regs, @function
|
||||
.global init_regs
|
||||
init_regs:
|
||||
# activate all threads
|
||||
li a0, -1
|
||||
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
|
||||
|
@ -66,12 +78,21 @@ vx_set_sp:
|
|||
# set stack pointer
|
||||
li sp, SMEM_BASE_ADDR # load stack base address
|
||||
#if SM_ENABLE
|
||||
csrr a2, CSR_LTID # get local thread id
|
||||
csrr a0, CSR_LTID # get local thread id
|
||||
#else
|
||||
csrr a2, CSR_GTID # get global thread id
|
||||
csrr a0, CSR_GTID # get global thread id
|
||||
#endif
|
||||
slli a1, a2, STACK_LOG2_SIZE
|
||||
sub sp, sp, a1 # sub thread block
|
||||
sll a1, a0, STACK_LOG2_SIZE
|
||||
sub sp, sp, a1
|
||||
|
||||
# set thread pointer register
|
||||
# use address space after BSS region
|
||||
# ensure cacheline alignment
|
||||
la a1, __tcb_aligned_size
|
||||
mul a0, a0, a1
|
||||
la tp, _end + 63
|
||||
add tp, tp, a0
|
||||
and tp, tp, -64
|
||||
|
||||
# disable active warps except warp0
|
||||
csrr a3, CSR_LWID # get local wid
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <unistd.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_print.h>
|
||||
#include <string.h>
|
||||
|
||||
int _close(int file) { return -1; }
|
||||
|
||||
|
@ -22,18 +23,36 @@ caddr_t _sbrk(int incr) {
|
|||
}
|
||||
|
||||
int _write(int file, char *ptr, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
vx_putchar(*ptr++);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
vx_putchar(*ptr++);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
int _kill(int pid, int sig) { return -1; }
|
||||
int _kill(int pid, int sig) { return -1; }
|
||||
|
||||
int _getpid() {
|
||||
return vx_warp_gid();
|
||||
}
|
||||
int _getpid() {
|
||||
return vx_warp_gid();
|
||||
}
|
||||
|
||||
void __init_tls(void) {
|
||||
extern char __tdata_start[];
|
||||
extern char __tbss_offset[];
|
||||
extern char __tdata_size[];
|
||||
extern char __tbss_size[];
|
||||
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// TLS memory initialization
|
||||
register char *__thread_self __asm__ ("tp");
|
||||
memcpy(__thread_self, __tdata_start, (size_t)__tdata_size);
|
||||
memset(__thread_self + (size_t)__tbss_offset, 0, (size_t)__tbss_size);
|
||||
|
||||
// back to single thread execution
|
||||
vx_tmc(0 == vx_warp_id());
|
||||
}
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
|
||||
|
@ -48,9 +67,7 @@ extern void _init (void);
|
|||
#endif
|
||||
|
||||
/* Iterate over all the init routines. */
|
||||
void
|
||||
__libc_init_array (void)
|
||||
{
|
||||
void __libc_init_array (void) {
|
||||
size_t count;
|
||||
size_t i;
|
||||
|
||||
|
@ -77,9 +94,7 @@ extern void _fini (void);
|
|||
#endif
|
||||
|
||||
/* Run all the cleanup routines. */
|
||||
void
|
||||
__libc_fini_array (void)
|
||||
{
|
||||
void __libc_fini_array (void) {
|
||||
size_t count;
|
||||
size_t i;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue