[13/23] nptl: Eliminate the __static_tls_size, __static_tls_align_m1 variables

Message ID 3a4fd2cf500c263176935ce1d6f78aea829207c0.1620838411.git.fweimer@redhat.com
State Superseded
Headers show
Series
  • nptl: Move almost all remaining functions into libc
Related show

Commit Message

Adhemerval Zanella via Libc-alpha May 12, 2021, 4:58 p.m.
Use the  __nptl_tls_static_size_for_stack inline function instead,
and the GLRO (dl_tls_static_align) value directly.

The computation of GLRO (dl_tls_static_align)  in
_dl_determine_tlsoffset ensures that the alignment is at least
TLS_TCB_ALIGN, which at least STACK_ALIGN (see allocate_stack).
Therefore, the additional rounding-up step is removed.

ALso move the initialization of the default stack size from
__pthread_initialize_minimal_internal to __pthread_early_init.
This introduces an extra system call during single-threaded startup,
but this simplifies the initialization sequence.  No locking is
needed around the writes to __default_pthread_attr because the
process is single-threaded at this point.
---
 elf/dl-tls.c                      |  5 ++--
 nptl/allocatestack.c              | 25 +++++++++--------
 nptl/nptl-init.c                  | 46 ++-----------------------------
 nptl/nptl-stack.h                 | 11 +++++++-
 nptl/pthreadP.h                   |  4 ---
 sysdeps/nptl/pthread_early_init.h | 28 +++++++++++++++++++
 6 files changed, 58 insertions(+), 61 deletions(-)

-- 
2.31.1

Comments

Adhemerval Zanella via Libc-alpha May 14, 2021, 12:40 p.m. | #1
On 12/05/2021 13:58, Florian Weimer via Libc-alpha wrote:
> Use the  __nptl_tls_static_size_for_stack inline function instead,

> and the GLRO (dl_tls_static_align) value directly.

> 

> The computation of GLRO (dl_tls_static_align)  in

> _dl_determine_tlsoffset ensures that the alignment is at least

> TLS_TCB_ALIGN, which at least STACK_ALIGN (see allocate_stack).

> Therefore, the additional rounding-up step is removed.

> 

> ALso move the initialization of the default stack size from

> __pthread_initialize_minimal_internal to __pthread_early_init.

> This introduces an extra system call during single-threaded startup,

> but this simplifies the initialization sequence.  No locking is

> needed around the writes to __default_pthread_attr because the

> process is single-threaded at this point.


LGTM, with some comments below.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>


> ---

>  elf/dl-tls.c                      |  5 ++--

>  nptl/allocatestack.c              | 25 +++++++++--------

>  nptl/nptl-init.c                  | 46 ++-----------------------------

>  nptl/nptl-stack.h                 | 11 +++++++-

>  nptl/pthreadP.h                   |  4 ---

>  sysdeps/nptl/pthread_early_init.h | 28 +++++++++++++++++++

>  6 files changed, 58 insertions(+), 61 deletions(-)

> 

> diff --git a/elf/dl-tls.c b/elf/dl-tls.c

> index 91031c2b72..e531ec5913 100644

> --- a/elf/dl-tls.c

> +++ b/elf/dl-tls.c

> @@ -386,8 +386,9 @@ allocate_dtv (void *result)

>    return result;

>  }

>  

> -

> -/* Get size and alignment requirements of the static TLS block.  */

> +/* Get size and alignment requirements of the static TLS block.  This

> +   function is no longer used by glibc itself, but the GCC sanitizers

> +   use it despite the GLIBC_PRIVATE status.  */

>  void

>  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)

>  {


Ok. I am not very found of adding this as de facto ABI, maybe we
proper export it outside GLIBC_PRIVATE since now binaries do rely
on them (since the sanitizer API project seems to be stalled).

> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c

> index c0a5c4d96d..dc81a2ca73 100644

> --- a/nptl/allocatestack.c

> +++ b/nptl/allocatestack.c

> @@ -254,6 +254,8 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>    struct pthread *pd;

>    size_t size;

>    size_t pagesize_m1 = __getpagesize () - 1;

> +  size_t tls_static_size_for_stack = __nptl_tls_static_size_for_stack ();

> +  size_t tls_static_align_m1 = GLRO (dl_tls_static_align) - 1;

>  

>    assert (powerof2 (pagesize_m1 + 1));

>    assert (TCB_ALIGNMENT >= STACK_ALIGN);

> @@ -284,17 +286,18 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>        /* If the user also specified the size of the stack make sure it

>  	 is large enough.  */

>        if (attr->stacksize != 0

> -	  && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))

> +	  && attr->stacksize < (tls_static_size_for_stack

> +				+ MINIMAL_REST_STACK))

>  	return EINVAL;

>  

>        /* Adjust stack size for alignment of the TLS block.  */


Ok.

>  #if TLS_TCB_AT_TP

>        adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)

> -	    & __static_tls_align_m1;

> +	    & tls_static_align_m1;

>        assert (size > adj + TLS_TCB_SIZE);

>  #elif TLS_DTV_AT_TP

> -      adj = ((uintptr_t) stackaddr - __static_tls_size)

> -	    & __static_tls_align_m1;

> +      adj = ((uintptr_t) stackaddr - tls_static_size_for_stack)

> +	    & tls_static_align_m1;

>        assert (size > adj);

>  #endif

>  


Ok.

> @@ -307,7 +310,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>  			       - TLS_TCB_SIZE - adj);

>  #elif TLS_DTV_AT_TP

>        pd = (struct pthread *) (((uintptr_t) stackaddr

> -				- __static_tls_size - adj)

> +				- tls_static_size_for_stack - adj)

>  			       - TLS_PRE_TCB_SIZE);

>  #endif

>  

> @@ -366,7 +369,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>  			| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));

>  

>        /* Adjust the stack size for alignment.  */

> -      size &= ~__static_tls_align_m1;

> +      size &= ~tls_static_align_m1;

>        assert (size != 0);

>  

>        /* Make sure the size of the stack is enough for the guard and


Ok.

> @@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>  	/* Arithmetic overflow.  */

>  	return EINVAL;

>        size += guardsize;

> -      if (__builtin_expect (size < ((guardsize + __static_tls_size

> +      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack

>  				     + MINIMAL_REST_STACK + pagesize_m1)

>  				    & ~pagesize_m1),

>  			    0))


Use __glibc_likely here.

> @@ -414,11 +417,11 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>  #if TLS_TCB_AT_TP

>  	  pd = (struct pthread *) ((((uintptr_t) mem + size)

>  				    - TLS_TCB_SIZE)

> -				   & ~__static_tls_align_m1);

> +				   & ~tls_static_align_m1);

>  #elif TLS_DTV_AT_TP

>  	  pd = (struct pthread *) ((((uintptr_t) mem + size

> -				    - __static_tls_size)

> -				    & ~__static_tls_align_m1)

> +				    - tls_static_size_for_stack)

> +				    & ~tls_static_align_m1)

>  				   - TLS_PRE_TCB_SIZE);

>  #endif

>  


Ok.

> @@ -602,7 +605,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>  

>  # if TLS_TCB_AT_TP

>    /* The stack begins before the TCB and the static TLS block.  */

> -  stacktop = ((char *) (pd + 1) - __static_tls_size);

> +  stacktop = ((char *) (pd + 1) - tls_static_size_for_stack);

>  # elif TLS_DTV_AT_TP

>    stacktop = (char *) (pd - 1);

>  # endif


Ok.

> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c

> index bc4831ac89..732e580355 100644

> --- a/nptl/nptl-init.c

> +++ b/nptl/nptl-init.c

> @@ -36,10 +36,7 @@

>  #include <kernel-features.h>

>  #include <libc-pointer-arith.h>

>  #include <pthread_mutex_conf.h>

> -

> -/* Size and alignment of static TLS block.  */

> -size_t __static_tls_size;

> -size_t __static_tls_align_m1;

> +#include <nptl-stack.h>

>  

>  /* Version of the library, used in libthread_db to detect mismatches.  */

>  static const char nptl_version[] __attribute_used__ = VERSION;

> @@ -47,44 +44,6 @@ static const char nptl_version[] __attribute_used__ = VERSION;

>  void

>  __pthread_initialize_minimal_internal (void)

>  {

> -  /* Get the size of the static and alignment requirements for the TLS

> -     block.  */

> -  size_t static_tls_align;

> -  _dl_get_tls_static_info (&__static_tls_size, &static_tls_align);

> -

> -  /* Make sure the size takes all the alignments into account.  */

> -  if (STACK_ALIGN > static_tls_align)

> -    static_tls_align = STACK_ALIGN;

> -  __static_tls_align_m1 = static_tls_align - 1;

> -

> -  __static_tls_size = roundup (__static_tls_size, static_tls_align);

> -

> -  /* Determine the default allowed stack size.  This is the size used

> -     in case the user does not specify one.  */

> -  struct rlimit limit;

> -  if (__getrlimit (RLIMIT_STACK, &limit) != 0

> -      || limit.rlim_cur == RLIM_INFINITY)

> -    /* The system limit is not usable.  Use an architecture-specific

> -       default.  */

> -    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;

> -  else if (limit.rlim_cur < PTHREAD_STACK_MIN)

> -    /* The system limit is unusably small.

> -       Use the minimal size acceptable.  */

> -    limit.rlim_cur = PTHREAD_STACK_MIN;

> -

> -  /* Make sure it meets the minimum size that allocate_stack

> -     (allocatestack.c) will demand, which depends on the page size.  */

> -  const uintptr_t pagesz = GLRO(dl_pagesize);

> -  const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;

> -  if (limit.rlim_cur < minstack)

> -    limit.rlim_cur = minstack;

> -

> -  /* Round the resource limit up to page size.  */

> -  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);

> -  lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);

> -  __default_pthread_attr.internal.stacksize = limit.rlim_cur;

> -  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);

> -  lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);

>  }

>  strong_alias (__pthread_initialize_minimal_internal,

>  	      __pthread_initialize_minimal)


Do we still need the empty __pthread_initialize_minimal_internal function?

> @@ -101,5 +60,6 @@ strong_alias (__pthread_initialize_minimal_internal,

>  size_t

>  __pthread_get_minstack (const pthread_attr_t *attr)

>  {

> -  return GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN;

> +  return (GLRO(dl_pagesize) + __nptl_tls_static_size_for_stack ()

> +	  + PTHREAD_STACK_MIN);

>  }


Ok.

> diff --git a/nptl/nptl-stack.h b/nptl/nptl-stack.h

> index 8631b61816..a6bd8df77f 100644

> --- a/nptl/nptl-stack.h

> +++ b/nptl/nptl-stack.h

> @@ -20,7 +20,8 @@

>  #ifndef _NPTL_STACK_H

>  #define _NPTL_STACK_H

>  

> -#include <descr.h>

> +#include <nptl/descr.h>

> +#include <ldsodefs.h>

>  #include <list.h>

>  #include <stdbool.h>

>  

> @@ -47,4 +48,12 @@ libc_hidden_proto (__nptl_deallocate_stack)

>  /* Free stacks until cache size is lower than LIMIT.  */

>  void __nptl_free_stacks (size_t limit) attribute_hidden;

>  

> +/* Compute the size of the static TLS area based on data from the

> +   dynamic loader.  */

> +static inline size_t

> +__nptl_tls_static_size_for_stack (void)

> +{

> +  return roundup (GLRO (dl_tls_static_size), GLRO (dl_tls_static_align));

> +}

> +

>  #endif /* _NPTL_STACK_H */


Ok.

> diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h

> index e33b071a4d..9ee61af8b3 100644

> --- a/nptl/pthreadP.h

> +++ b/nptl/pthreadP.h

> @@ -205,10 +205,6 @@ libc_hidden_proto (__default_pthread_attr_lock)

>  /* Called from __libc_freeres to deallocate the default attribute.  */

>  extern void __default_pthread_attr_freeres (void) attribute_hidden;

>  

> -/* Size and alignment of static TLS block.  */

> -extern size_t __static_tls_size attribute_hidden;

> -extern size_t __static_tls_align_m1 attribute_hidden;

> -

>  /* Attribute handling.  */

>  extern struct pthread_attr *__attr_list attribute_hidden;

>  extern int __attr_list_lock attribute_hidden;


Ok.

> diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h

> index 2d15303dd9..5b49ce39c2 100644

> --- a/sysdeps/nptl/pthread_early_init.h

> +++ b/sysdeps/nptl/pthread_early_init.h

> @@ -19,12 +19,40 @@

>  #ifndef _PTHREAD_EARLY_INIT_H

>  #define _PTHREAD_EARLY_INIT_H 1

>  

> +#include <nptl/nptl-stack.h>

>  #include <nptl/pthreadP.h>

>  #include <pthread_mutex_conf.h>

> +#include <sys/resource.h>

>  

>  static inline void

>  __pthread_early_init (void)

>  {

> +  /* Determine the default allowed stack size.  This is the size used

> +     in case the user does not specify one.  */

> +  struct rlimit limit;

> +  if (__getrlimit (RLIMIT_STACK, &limit) != 0

> +      || limit.rlim_cur == RLIM_INFINITY)

> +    /* The system limit is not usable.  Use an architecture-specific

> +       default.  */

> +    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;

> +  else if (limit.rlim_cur < PTHREAD_STACK_MIN)

> +    /* The system limit is unusably small.

> +       Use the minimal size acceptable.  */

> +    limit.rlim_cur = PTHREAD_STACK_MIN;


Maybe we should move away from non-LFS inside glibc call and use 
__getlimit64 instead here.

> +

> +  /* Make sure it meets the minimum size that allocate_stack

> +     (allocatestack.c) will demand, which depends on the page size.  */

> +  const uintptr_t pagesz = GLRO(dl_pagesize);

> +  const size_t minstack = (pagesz + __nptl_tls_static_size_for_stack ()

> +                           + MINIMAL_REST_STACK);

> +  if (limit.rlim_cur < minstack)

> +    limit.rlim_cur = minstack;

> +

> +  /* Round the resource limit up to page size.  */

> +  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);

> +  __default_pthread_attr.internal.stacksize = limit.rlim_cur;

> +  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);

> +

>  #if HAVE_TUNABLES

>    __pthread_tunables_init ();

>  #endif

> 


Ok.
Adhemerval Zanella via Libc-alpha May 17, 2021, 8:14 a.m. | #2
* Adhemerval Zanella via Libc-alpha:

>> -/* Get size and alignment requirements of the static TLS block.  */

>> +/* Get size and alignment requirements of the static TLS block.  This

>> +   function is no longer used by glibc itself, but the GCC sanitizers

>> +   use it despite the GLIBC_PRIVATE status.  */

>>  void

>>  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)

>>  {

>

> Ok. I am not very found of adding this as de facto ABI, maybe we

> proper export it outside GLIBC_PRIVATE since now binaries do rely

> on them (since the sanitizer API project seems to be stalled).


The API is not future-proof because I want to implement resizable static
TLS one day.  I think it's useful to document that GCC uses it.  (A
first version of this patch removed this function, but then I checked
GCC.)

>> @@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>>  	/* Arithmetic overflow.  */

>>  	return EINVAL;

>>        size += guardsize;

>> -      if (__builtin_expect (size < ((guardsize + __static_tls_size

>> +      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack

>>  				     + MINIMAL_REST_STACK + pagesize_m1)

>>  				    & ~pagesize_m1),

>>  			    0))

>

> Use __glibc_likely here.


I think we should eliminate all these hints from the thread creation
code.  It seems unlikely that there is a performance impact, and the
likely/unlikely hints are wrong for applications that consistently use
specific pthread_create features anyway.  But I'd prefer this to be a
separate patch.

> Do we still need the empty __pthread_initialize_minimal_internal

> function?


It is still called from the preinit code.

>> diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h

>> index 2d15303dd9..5b49ce39c2 100644

>> --- a/sysdeps/nptl/pthread_early_init.h

>> +++ b/sysdeps/nptl/pthread_early_init.h

>> @@ -19,12 +19,40 @@

>>  #ifndef _PTHREAD_EARLY_INIT_H

>>  #define _PTHREAD_EARLY_INIT_H 1

>>  

>> +#include <nptl/nptl-stack.h>

>>  #include <nptl/pthreadP.h>

>>  #include <pthread_mutex_conf.h>

>> +#include <sys/resource.h>

>>  

>>  static inline void

>>  __pthread_early_init (void)

>>  {

>> +  /* Determine the default allowed stack size.  This is the size used

>> +     in case the user does not specify one.  */

>> +  struct rlimit limit;

>> +  if (__getrlimit (RLIMIT_STACK, &limit) != 0

>> +      || limit.rlim_cur == RLIM_INFINITY)

>> +    /* The system limit is not usable.  Use an architecture-specific

>> +       default.  */

>> +    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;

>> +  else if (limit.rlim_cur < PTHREAD_STACK_MIN)

>> +    /* The system limit is unusably small.

>> +       Use the minimal size acceptable.  */

>> +    limit.rlim_cur = PTHREAD_STACK_MIN;

>

> Maybe we should move away from non-LFS inside glibc call and use 

> __getlimit64 instead here.


Yes, there's another call in nptl/pthread_getattr_np.c.

Thanks,
Florian
Adhemerval Zanella via Libc-alpha May 18, 2021, 5:33 p.m. | #3
On 17/05/2021 05:14, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:

> 

>>> -/* Get size and alignment requirements of the static TLS block.  */

>>> +/* Get size and alignment requirements of the static TLS block.  This

>>> +   function is no longer used by glibc itself, but the GCC sanitizers

>>> +   use it despite the GLIBC_PRIVATE status.  */

>>>  void

>>>  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)

>>>  {

>>

>> Ok. I am not very found of adding this as de facto ABI, maybe we

>> proper export it outside GLIBC_PRIVATE since now binaries do rely

>> on them (since the sanitizer API project seems to be stalled).

> 

> The API is not future-proof because I want to implement resizable static

> TLS one day.  I think it's useful to document that GCC uses it.  (A

> first version of this patch removed this function, but then I checked

> GCC.)


Maybe we can fix it on gcc/sanitizer so we might not be bounded to add
backward compatibility symbols for GLIBC_PRIVATE.

> 

>>> @@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,

>>>  	/* Arithmetic overflow.  */

>>>  	return EINVAL;

>>>        size += guardsize;

>>> -      if (__builtin_expect (size < ((guardsize + __static_tls_size

>>> +      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack

>>>  				     + MINIMAL_REST_STACK + pagesize_m1)

>>>  				    & ~pagesize_m1),

>>>  			    0))

>>

>> Use __glibc_likely here.

> 

> I think we should eliminate all these hints from the thread creation

> code.  It seems unlikely that there is a performance impact, and the

> likely/unlikely hints are wrong for applications that consistently use

> specific pthread_create features anyway.  But I'd prefer this to be a

> separate patch.


Fair enough.  In fact, I think glibc overuse the branch hints in a
lot of places and I have the hint most of them does not improve much.


> 

>> Do we still need the empty __pthread_initialize_minimal_internal

>> function?

> 

> It is still called from the preinit code.


Ok.

> 

>>> diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h

>>> index 2d15303dd9..5b49ce39c2 100644

>>> --- a/sysdeps/nptl/pthread_early_init.h

>>> +++ b/sysdeps/nptl/pthread_early_init.h

>>> @@ -19,12 +19,40 @@

>>>  #ifndef _PTHREAD_EARLY_INIT_H

>>>  #define _PTHREAD_EARLY_INIT_H 1

>>>  

>>> +#include <nptl/nptl-stack.h>

>>>  #include <nptl/pthreadP.h>

>>>  #include <pthread_mutex_conf.h>

>>> +#include <sys/resource.h>

>>>  

>>>  static inline void

>>>  __pthread_early_init (void)

>>>  {

>>> +  /* Determine the default allowed stack size.  This is the size used

>>> +     in case the user does not specify one.  */

>>> +  struct rlimit limit;

>>> +  if (__getrlimit (RLIMIT_STACK, &limit) != 0

>>> +      || limit.rlim_cur == RLIM_INFINITY)

>>> +    /* The system limit is not usable.  Use an architecture-specific

>>> +       default.  */

>>> +    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;

>>> +  else if (limit.rlim_cur < PTHREAD_STACK_MIN)

>>> +    /* The system limit is unusably small.

>>> +       Use the minimal size acceptable.  */

>>> +    limit.rlim_cur = PTHREAD_STACK_MIN;

>>

>> Maybe we should move away from non-LFS inside glibc call and use 

>> __getlimit64 instead here.

> 

> Yes, there's another call in nptl/pthread_getattr_np.c.

> 

> Thanks,

> Florian

>

Patch

diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 91031c2b72..e531ec5913 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -386,8 +386,9 @@  allocate_dtv (void *result)
   return result;
 }
 
-
-/* Get size and alignment requirements of the static TLS block.  */
+/* Get size and alignment requirements of the static TLS block.  This
+   function is no longer used by glibc itself, but the GCC sanitizers
+   use it despite the GLIBC_PRIVATE status.  */
 void
 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
 {
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index c0a5c4d96d..dc81a2ca73 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -254,6 +254,8 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
   struct pthread *pd;
   size_t size;
   size_t pagesize_m1 = __getpagesize () - 1;
+  size_t tls_static_size_for_stack = __nptl_tls_static_size_for_stack ();
+  size_t tls_static_align_m1 = GLRO (dl_tls_static_align) - 1;
 
   assert (powerof2 (pagesize_m1 + 1));
   assert (TCB_ALIGNMENT >= STACK_ALIGN);
@@ -284,17 +286,18 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
       /* If the user also specified the size of the stack make sure it
 	 is large enough.  */
       if (attr->stacksize != 0
-	  && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
+	  && attr->stacksize < (tls_static_size_for_stack
+				+ MINIMAL_REST_STACK))
 	return EINVAL;
 
       /* Adjust stack size for alignment of the TLS block.  */
 #if TLS_TCB_AT_TP
       adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
-	    & __static_tls_align_m1;
+	    & tls_static_align_m1;
       assert (size > adj + TLS_TCB_SIZE);
 #elif TLS_DTV_AT_TP
-      adj = ((uintptr_t) stackaddr - __static_tls_size)
-	    & __static_tls_align_m1;
+      adj = ((uintptr_t) stackaddr - tls_static_size_for_stack)
+	    & tls_static_align_m1;
       assert (size > adj);
 #endif
 
@@ -307,7 +310,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 			       - TLS_TCB_SIZE - adj);
 #elif TLS_DTV_AT_TP
       pd = (struct pthread *) (((uintptr_t) stackaddr
-				- __static_tls_size - adj)
+				- tls_static_size_for_stack - adj)
 			       - TLS_PRE_TCB_SIZE);
 #endif
 
@@ -366,7 +369,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 			| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 
       /* Adjust the stack size for alignment.  */
-      size &= ~__static_tls_align_m1;
+      size &= ~tls_static_align_m1;
       assert (size != 0);
 
       /* Make sure the size of the stack is enough for the guard and
@@ -385,7 +388,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 	/* Arithmetic overflow.  */
 	return EINVAL;
       size += guardsize;
-      if (__builtin_expect (size < ((guardsize + __static_tls_size
+      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack
 				     + MINIMAL_REST_STACK + pagesize_m1)
 				    & ~pagesize_m1),
 			    0))
@@ -414,11 +417,11 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 #if TLS_TCB_AT_TP
 	  pd = (struct pthread *) ((((uintptr_t) mem + size)
 				    - TLS_TCB_SIZE)
-				   & ~__static_tls_align_m1);
+				   & ~tls_static_align_m1);
 #elif TLS_DTV_AT_TP
 	  pd = (struct pthread *) ((((uintptr_t) mem + size
-				    - __static_tls_size)
-				    & ~__static_tls_align_m1)
+				    - tls_static_size_for_stack)
+				    & ~tls_static_align_m1)
 				   - TLS_PRE_TCB_SIZE);
 #endif
 
@@ -602,7 +605,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 # if TLS_TCB_AT_TP
   /* The stack begins before the TCB and the static TLS block.  */
-  stacktop = ((char *) (pd + 1) - __static_tls_size);
+  stacktop = ((char *) (pd + 1) - tls_static_size_for_stack);
 # elif TLS_DTV_AT_TP
   stacktop = (char *) (pd - 1);
 # endif
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index bc4831ac89..732e580355 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -36,10 +36,7 @@ 
 #include <kernel-features.h>
 #include <libc-pointer-arith.h>
 #include <pthread_mutex_conf.h>
-
-/* Size and alignment of static TLS block.  */
-size_t __static_tls_size;
-size_t __static_tls_align_m1;
+#include <nptl-stack.h>
 
 /* Version of the library, used in libthread_db to detect mismatches.  */
 static const char nptl_version[] __attribute_used__ = VERSION;
@@ -47,44 +44,6 @@  static const char nptl_version[] __attribute_used__ = VERSION;
 void
 __pthread_initialize_minimal_internal (void)
 {
-  /* Get the size of the static and alignment requirements for the TLS
-     block.  */
-  size_t static_tls_align;
-  _dl_get_tls_static_info (&__static_tls_size, &static_tls_align);
-
-  /* Make sure the size takes all the alignments into account.  */
-  if (STACK_ALIGN > static_tls_align)
-    static_tls_align = STACK_ALIGN;
-  __static_tls_align_m1 = static_tls_align - 1;
-
-  __static_tls_size = roundup (__static_tls_size, static_tls_align);
-
-  /* Determine the default allowed stack size.  This is the size used
-     in case the user does not specify one.  */
-  struct rlimit limit;
-  if (__getrlimit (RLIMIT_STACK, &limit) != 0
-      || limit.rlim_cur == RLIM_INFINITY)
-    /* The system limit is not usable.  Use an architecture-specific
-       default.  */
-    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
-  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
-    /* The system limit is unusably small.
-       Use the minimal size acceptable.  */
-    limit.rlim_cur = PTHREAD_STACK_MIN;
-
-  /* Make sure it meets the minimum size that allocate_stack
-     (allocatestack.c) will demand, which depends on the page size.  */
-  const uintptr_t pagesz = GLRO(dl_pagesize);
-  const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;
-  if (limit.rlim_cur < minstack)
-    limit.rlim_cur = minstack;
-
-  /* Round the resource limit up to page size.  */
-  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
-  lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
-  __default_pthread_attr.internal.stacksize = limit.rlim_cur;
-  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
-  lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 }
 strong_alias (__pthread_initialize_minimal_internal,
 	      __pthread_initialize_minimal)
@@ -101,5 +60,6 @@  strong_alias (__pthread_initialize_minimal_internal,
 size_t
 __pthread_get_minstack (const pthread_attr_t *attr)
 {
-  return GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN;
+  return (GLRO(dl_pagesize) + __nptl_tls_static_size_for_stack ()
+	  + PTHREAD_STACK_MIN);
 }
diff --git a/nptl/nptl-stack.h b/nptl/nptl-stack.h
index 8631b61816..a6bd8df77f 100644
--- a/nptl/nptl-stack.h
+++ b/nptl/nptl-stack.h
@@ -20,7 +20,8 @@ 
 #ifndef _NPTL_STACK_H
 #define _NPTL_STACK_H
 
-#include <descr.h>
+#include <nptl/descr.h>
+#include <ldsodefs.h>
 #include <list.h>
 #include <stdbool.h>
 
@@ -47,4 +48,12 @@  libc_hidden_proto (__nptl_deallocate_stack)
 /* Free stacks until cache size is lower than LIMIT.  */
 void __nptl_free_stacks (size_t limit) attribute_hidden;
 
+/* Compute the size of the static TLS area based on data from the
+   dynamic loader.  */
+static inline size_t
+__nptl_tls_static_size_for_stack (void)
+{
+  return roundup (GLRO (dl_tls_static_size), GLRO (dl_tls_static_align));
+}
+
 #endif /* _NPTL_STACK_H */
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index e33b071a4d..9ee61af8b3 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -205,10 +205,6 @@  libc_hidden_proto (__default_pthread_attr_lock)
 /* Called from __libc_freeres to deallocate the default attribute.  */
 extern void __default_pthread_attr_freeres (void) attribute_hidden;
 
-/* Size and alignment of static TLS block.  */
-extern size_t __static_tls_size attribute_hidden;
-extern size_t __static_tls_align_m1 attribute_hidden;
-
 /* Attribute handling.  */
 extern struct pthread_attr *__attr_list attribute_hidden;
 extern int __attr_list_lock attribute_hidden;
diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h
index 2d15303dd9..5b49ce39c2 100644
--- a/sysdeps/nptl/pthread_early_init.h
+++ b/sysdeps/nptl/pthread_early_init.h
@@ -19,12 +19,40 @@ 
 #ifndef _PTHREAD_EARLY_INIT_H
 #define _PTHREAD_EARLY_INIT_H 1
 
+#include <nptl/nptl-stack.h>
 #include <nptl/pthreadP.h>
 #include <pthread_mutex_conf.h>
+#include <sys/resource.h>
 
 static inline void
 __pthread_early_init (void)
 {
+  /* Determine the default allowed stack size.  This is the size used
+     in case the user does not specify one.  */
+  struct rlimit limit;
+  if (__getrlimit (RLIMIT_STACK, &limit) != 0
+      || limit.rlim_cur == RLIM_INFINITY)
+    /* The system limit is not usable.  Use an architecture-specific
+       default.  */
+    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
+  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
+    /* The system limit is unusably small.
+       Use the minimal size acceptable.  */
+    limit.rlim_cur = PTHREAD_STACK_MIN;
+
+  /* Make sure it meets the minimum size that allocate_stack
+     (allocatestack.c) will demand, which depends on the page size.  */
+  const uintptr_t pagesz = GLRO(dl_pagesize);
+  const size_t minstack = (pagesz + __nptl_tls_static_size_for_stack ()
+                           + MINIMAL_REST_STACK);
+  if (limit.rlim_cur < minstack)
+    limit.rlim_cur = minstack;
+
+  /* Round the resource limit up to page size.  */
+  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
+  __default_pthread_attr.internal.stacksize = limit.rlim_cur;
+  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
+
 #if HAVE_TUNABLES
   __pthread_tunables_init ();
 #endif