adding support for TLS global variables

This commit is contained in:
Blaise Tine 2022-07-14 06:03:02 -04:00
parent 77002dd06a
commit da834a28df
4 changed files with 93 additions and 37 deletions

View file

@ -89,12 +89,22 @@ SECTIONS
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
PROVIDE_HIDDEN (__tdata_end = .);
}
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);

View file

@ -89,12 +89,22 @@ SECTIONS
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
PROVIDE_HIDDEN (__tdata_end = .);
}
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);

View file

@ -4,32 +4,44 @@
.global _start
.type _start, @function
_start:
# execute stack initialization on all warps
la a1, vx_set_sp
csrr a0, CSR_NW # get num warps
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
jal vx_set_sp
# initialize per-thread registers
csrr a0, CSR_NW # get num warps
la a1, init_regs
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
jal init_regs
# return back to single thread execution
li a0, 1
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
# initialize TLS for all warps
csrr a0, CSR_NW # get num warps
la a1, __init_tls
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
call __init_tls
# return back to single thread execution
li a0, 1
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
# Clear the bss segment
# clear BSS segment
la a0, _edata
la a2, _end
sub a2, a2, a0
li a1, 0
call memset
# Initialize trap vector
# a t0, trap_entry
# csrw mtvec, t0
# Register global termination functions
la a0, __libc_fini_array
la a0, __libc_fini_array
# to be called upon exit
call atexit
call atexit
# Run global initialization functions
call __libc_init_array
call __libc_init_array
# call main program routine
call main
@ -49,9 +61,9 @@ _exit:
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
.section .text
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
.type init_regs, @function
.global init_regs
init_regs:
# activate all threads
li a0, -1
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
@ -66,12 +78,21 @@ vx_set_sp:
# set stack pointer
li sp, SMEM_BASE_ADDR # load stack base address
#if SM_ENABLE
csrr a2, CSR_LTID # get local thread id
csrr a0, CSR_LTID # get local thread id
#else
csrr a2, CSR_GTID # get global thread id
csrr a0, CSR_GTID # get global thread id
#endif
slli a1, a2, STACK_LOG2_SIZE
sub sp, sp, a1 # sub thread block
sll a1, a0, STACK_LOG2_SIZE
sub sp, sp, a1
# set thread pointer register
# use address space after BSS region
# ensure cacheline alignment
la a1, __tcb_aligned_size
mul a0, a0, a1
la tp, _end + 63
add tp, tp, a0
and tp, tp, -64
# disable active warps except warp0
csrr a3, CSR_LWID # get local wid

View file

@ -3,6 +3,7 @@
#include <unistd.h>
#include <vx_intrinsics.h>
#include <vx_print.h>
#include <string.h>
int _close(int file) { return -1; }
@ -22,18 +23,36 @@ caddr_t _sbrk(int incr) {
}
int _write(int file, char *ptr, int len) {
int i;
for (i = 0; i < len; ++i) {
vx_putchar(*ptr++);
}
return len;
}
int i;
for (i = 0; i < len; ++i) {
vx_putchar(*ptr++);
}
return len;
}
int _kill(int pid, int sig) { return -1; }
int _kill(int pid, int sig) { return -1; }
int _getpid() {
return vx_warp_gid();
}
int _getpid() {
return vx_warp_gid();
}
void __init_tls(void) {
extern char __tdata_start[];
extern char __tbss_offset[];
extern char __tdata_size[];
extern char __tbss_size[];
// activate all threads
vx_tmc(-1);
// TLS memory initialization
register char *__thread_self __asm__ ("tp");
memcpy(__thread_self, __tdata_start, (size_t)__tdata_size);
memset(__thread_self + (size_t)__tbss_offset, 0, (size_t)__tbss_size);
// back to single thread execution
vx_tmc(0 == vx_warp_id());
}
#ifdef HAVE_INITFINI_ARRAY
@ -48,9 +67,7 @@ extern void _init (void);
#endif
/* Iterate over all the init routines. */
void
__libc_init_array (void)
{
void __libc_init_array (void) {
size_t count;
size_t i;
@ -77,9 +94,7 @@ extern void _fini (void);
#endif
/* Run all the cleanup routines. */
void
__libc_fini_array (void)
{
void __libc_fini_array (void) {
size_t count;
size_t i;