[13/13] Linux: Move __reclaim_stacks into the fork implementation in libc

Message ID 55e7367c90178cd8fe4cddb84ace7e31b6648f52.1620323953.git.fweimer@redhat.com
State New
Headers show
Series
  • Linux: Move most stack management out of libpthread
Related show

Commit Message

Mark Brown via Libc-alpha May 6, 2021, 6:11 p.m.
As a result, __libc_pthread_init is no longer needed.
---
 nptl/Versions        |   1 -
 nptl/allocatestack.c | 108 ------------------------------------------
 nptl/nptl-init.c     |   3 --
 nptl/pthreadP.h      |   7 ---
 sysdeps/nptl/fork.c  | 110 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+), 119 deletions(-)

-- 
2.30.2

Comments

Mark Brown via Libc-alpha May 9, 2021, 9:41 p.m. | #1
On 5/6/21 2:11 PM, Florian Weimer via Libc-alpha wrote:
> As a result, __libc_pthread_init is no longer needed.


Yay! :-)

LGTM.

Tested on x86_64 and i686 without regression.

Tested-by: Carlos O'Donell <carlos@redhat.com>

Reviewed-by: Carlos O'Donell <carlos@redhat.com>



> ---

>  nptl/Versions        |   1 -

>  nptl/allocatestack.c | 108 ------------------------------------------

>  nptl/nptl-init.c     |   3 --

>  nptl/pthreadP.h      |   7 ---

>  sysdeps/nptl/fork.c  | 110 +++++++++++++++++++++++++++++++++++++++++++

>  5 files changed, 110 insertions(+), 119 deletions(-)

> 

> diff --git a/nptl/Versions b/nptl/Versions

> index d439a023b7..4c1c4ee0a7 100644

> --- a/nptl/Versions

> +++ b/nptl/Versions

> @@ -308,7 +308,6 @@ libc {

>      __libc_cleanup_push_defer;

>      __libc_dl_error_tsd;

>      __libc_multiple_threads;

> -    __libc_pthread_init;


OK.

>      __lll_clocklock_elision;

>      __lll_lock_elision;

>      __lll_lock_wait;

> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c

> index 076cffd35b..8672e89e75 100644

> --- a/nptl/allocatestack.c

> +++ b/nptl/allocatestack.c

> @@ -754,111 +754,3 @@ __deallocate_stack (struct pthread *pd)

>  

>    lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);

>  }

> -

> -/* In case of a fork() call the memory allocation in the child will be

> -   the same but only one thread is running.  All stacks except that of

> -   the one running thread are not used anymore.  We have to recycle

> -   them.  */

> -void

> -__reclaim_stacks (void)

> -{

> -  struct pthread *self = (struct pthread *) THREAD_SELF;

> -

> -  /* No locking necessary.  The caller is the only stack in use.  But

> -     we have to be aware that we might have interrupted a list

> -     operation.  */

> -

> -  if (GL (dl_in_flight_stack) != 0)

> -    {

> -      bool add_p = GL (dl_in_flight_stack) & 1;

> -      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);

> -

> -      if (add_p)

> -	{

> -	  /* We always add at the beginning of the list.  So in this case we

> -	     only need to check the beginning of these lists to see if the

> -	     pointers at the head of the list are inconsistent.  */

> -	  list_t *l = NULL;

> -

> -	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))

> -	    l = &GL (dl_stack_used);

> -	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))

> -	    l = &GL (dl_stack_cache);

> -

> -	  if (l != NULL)

> -	    {

> -	      assert (l->next->prev == elem);

> -	      elem->next = l->next;

> -	      elem->prev = l;

> -	      l->next = elem;

> -	    }

> -	}

> -      else

> -	{

> -	  /* We can simply always replay the delete operation.  */

> -	  elem->next->prev = elem->prev;

> -	  elem->prev->next = elem->next;

> -	}

> -

> -      GL (dl_in_flight_stack) = 0;

> -    }

> -

> -  /* Mark all stacks except the still running one as free.  */

> -  list_t *runp;

> -  list_for_each (runp, &GL (dl_stack_used))

> -    {

> -      struct pthread *curp = list_entry (runp, struct pthread, list);

> -      if (curp != self)

> -	{

> -	  /* This marks the stack as free.  */

> -	  curp->tid = 0;

> -

> -	  /* Account for the size of the stack.  */

> -	  GL (dl_stack_cache_actsize) += curp->stackblock_size;

> -

> -	  if (curp->specific_used)

> -	    {

> -	      /* Clear the thread-specific data.  */

> -	      memset (curp->specific_1stblock, '\0',

> -		      sizeof (curp->specific_1stblock));

> -

> -	      curp->specific_used = false;

> -

> -	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)

> -		if (curp->specific[cnt] != NULL)

> -		  {

> -		    memset (curp->specific[cnt], '\0',

> -			    sizeof (curp->specific_1stblock));

> -

> -		    /* We have allocated the block which we do not

> -		       free here so re-set the bit.  */

> -		    curp->specific_used = true;

> -		  }

> -	    }

> -	}

> -    }

> -

> -  /* Add the stack of all running threads to the cache.  */

> -  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));

> -

> -  /* Remove the entry for the current thread to from the cache list

> -     and add it to the list of running threads.  Which of the two

> -     lists is decided by the user_stack flag.  */

> -  list_del (&self->list);

> -

> -  /* Re-initialize the lists for all the threads.  */

> -  INIT_LIST_HEAD (&GL (dl_stack_used));

> -  INIT_LIST_HEAD (&GL (dl_stack_user));

> -

> -  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))

> -    list_add (&self->list, &GL (dl_stack_user));

> -  else

> -    list_add (&self->list, &GL (dl_stack_used));

> -

> -  /* There is one thread running.  */

> -  __nptl_nthreads = 1;

> -

> -  /* Initialize locks.  */

> -  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;

> -  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;

> -}

> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c

> index 4c89e7a792..16fb66bdf5 100644

> --- a/nptl/nptl-init.c

> +++ b/nptl/nptl-init.c

> @@ -172,9 +172,6 @@ __pthread_initialize_minimal_internal (void)

>    __default_pthread_attr.internal.stacksize = limit.rlim_cur;

>    __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);

>    lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);

> -

> -  /* Register the fork generation counter with the libc.  */

> -  __libc_pthread_init (__reclaim_stacks);

>  }

>  strong_alias (__pthread_initialize_minimal_internal,

>  	      __pthread_initialize_minimal)

> diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h

> index 6b912f053b..d9b97c814a 100644

> --- a/nptl/pthreadP.h

> +++ b/nptl/pthreadP.h

> @@ -333,10 +333,6 @@ extern void __free_tcb (struct pthread *pd) attribute_hidden;

>  /* Free allocated stack.  */

>  extern void __deallocate_stack (struct pthread *pd) attribute_hidden;

>  

> -/* Mark all the stacks except for the current one as available.  This

> -   function also re-initializes the lock for the stack cache.  */

> -extern void __reclaim_stacks (void) attribute_hidden;

> -

>  /* Change the permissions of a thread stack.  Called from

>     _dl_make_stacks_executable and pthread_create.  */

>  int

> @@ -372,9 +368,6 @@ extern unsigned long int __fork_generation attribute_hidden;

>  /* Pointer to the fork generation counter in the thread library.  */

>  extern unsigned long int *__fork_generation_pointer attribute_hidden;

>  

> -/* Register the generation counter in the libpthread with the libc.  */

> -extern void __libc_pthread_init (void (*reclaim) (void));

> -

>  extern size_t __pthread_get_minstack (const pthread_attr_t *attr);

>  

>  /* Namespace save aliases.  */

> diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c

> index f41c40fca0..062b01265a 100644

> --- a/sysdeps/nptl/fork.c

> +++ b/sysdeps/nptl/fork.c

> @@ -35,6 +35,7 @@

>  #include <nss/nss_database.h>

>  #include <unwind-link.h>

>  #include <sys/single_threaded.h>

> +#include <list.h>

>  

>  static void

>  fresetlockfiles (void)

> @@ -46,6 +47,106 @@ fresetlockfiles (void)

>        _IO_lock_init (*((_IO_lock_t *) _IO_iter_file(i)->_lock));

>  }

>  

> +/* In case of a fork() call the memory allocation in the child will be

> +   the same but only one thread is running.  All stacks except that of

> +   the one running thread are not used anymore.  We have to recycle

> +   them.  */

> +static void

> +reclaim_stacks (void)

> +{

> +  struct pthread *self = (struct pthread *) THREAD_SELF;

> +

> +  /* No locking necessary.  The caller is the only stack in use.  But

> +     we have to be aware that we might have interrupted a list

> +     operation.  */

> +

> +  if (GL (dl_in_flight_stack) != 0)

> +    {

> +      bool add_p = GL (dl_in_flight_stack) & 1;

> +      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);

> +

> +      if (add_p)

> +	{

> +	  /* We always add at the beginning of the list.  So in this case we

> +	     only need to check the beginning of these lists to see if the

> +	     pointers at the head of the list are inconsistent.  */

> +	  list_t *l = NULL;

> +

> +	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))

> +	    l = &GL (dl_stack_used);

> +	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))

> +	    l = &GL (dl_stack_cache);

> +

> +	  if (l != NULL)

> +	    {

> +	      assert (l->next->prev == elem);

> +	      elem->next = l->next;

> +	      elem->prev = l;

> +	      l->next = elem;

> +	    }

> +	}

> +      else

> +	{

> +	  /* We can simply always replay the delete operation.  */

> +	  elem->next->prev = elem->prev;

> +	  elem->prev->next = elem->next;

> +	}

> +

> +      GL (dl_in_flight_stack) = 0;

> +    }

> +

> +  /* Mark all stacks except the still running one as free.  */

> +  list_t *runp;

> +  list_for_each (runp, &GL (dl_stack_used))

> +    {

> +      struct pthread *curp = list_entry (runp, struct pthread, list);

> +      if (curp != self)

> +	{

> +	  /* This marks the stack as free.  */

> +	  curp->tid = 0;

> +

> +	  /* Account for the size of the stack.  */

> +	  GL (dl_stack_cache_actsize) += curp->stackblock_size;

> +

> +	  if (curp->specific_used)

> +	    {

> +	      /* Clear the thread-specific data.  */

> +	      memset (curp->specific_1stblock, '\0',

> +		      sizeof (curp->specific_1stblock));

> +

> +	      curp->specific_used = false;

> +

> +	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)

> +		if (curp->specific[cnt] != NULL)

> +		  {

> +		    memset (curp->specific[cnt], '\0',

> +			    sizeof (curp->specific_1stblock));

> +

> +		    /* We have allocated the block which we do not

> +		       free here so re-set the bit.  */

> +		    curp->specific_used = true;

> +		  }

> +	    }

> +	}

> +    }

> +

> +  /* Add the stack of all running threads to the cache.  */

> +  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));

> +

> +  /* Remove the entry for the current thread to from the cache list

> +     and add it to the list of running threads.  Which of the two

> +     lists is decided by the user_stack flag.  */

> +  list_del (&self->list);

> +

> +  /* Re-initialize the lists for all the threads.  */

> +  INIT_LIST_HEAD (&GL (dl_stack_used));

> +  INIT_LIST_HEAD (&GL (dl_stack_user));

> +

> +  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))

> +    list_add (&self->list, &GL (dl_stack_user));

> +  else

> +    list_add (&self->list, &GL (dl_stack_used));

> +}

>  

>  pid_t

>  __libc_fork (void)

> @@ -112,6 +213,13 @@ __libc_fork (void)

>  	{

>  	  __libc_unwind_link_after_fork ();

>  

> +	  /* There is one thread running.  */

> +	  __nptl_nthreads = 1;

> +

> +	  /* Initialize thread library locks.  */

> +	  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;

> +	  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;

> +

>  	  /* Release malloc locks.  */

>  	  call_function_static_weak (__malloc_fork_unlock_child);

>  

> @@ -128,6 +236,8 @@ __libc_fork (void)

>        /* Reset the lock the dynamic loader uses to protect its data.  */

>        __rtld_lock_initialize (GL(dl_load_lock));

>  

> +      reclaim_stacks ();

> +

>        /* Run the handlers registered for the child.  */

>        __run_fork_handlers (atfork_run_child, multiple_threads);

>      }

> 



-- 
Cheers,
Carlos.

Patch

diff --git a/nptl/Versions b/nptl/Versions
index d439a023b7..4c1c4ee0a7 100644
--- a/nptl/Versions
+++ b/nptl/Versions
@@ -308,7 +308,6 @@  libc {
     __libc_cleanup_push_defer;
     __libc_dl_error_tsd;
     __libc_multiple_threads;
-    __libc_pthread_init;
     __lll_clocklock_elision;
     __lll_lock_elision;
     __lll_lock_wait;
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 076cffd35b..8672e89e75 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -754,111 +754,3 @@  __deallocate_stack (struct pthread *pd)
 
   lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 }
-
-/* In case of a fork() call the memory allocation in the child will be
-   the same but only one thread is running.  All stacks except that of
-   the one running thread are not used anymore.  We have to recycle
-   them.  */
-void
-__reclaim_stacks (void)
-{
-  struct pthread *self = (struct pthread *) THREAD_SELF;
-
-  /* No locking necessary.  The caller is the only stack in use.  But
-     we have to be aware that we might have interrupted a list
-     operation.  */
-
-  if (GL (dl_in_flight_stack) != 0)
-    {
-      bool add_p = GL (dl_in_flight_stack) & 1;
-      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
-
-      if (add_p)
-	{
-	  /* We always add at the beginning of the list.  So in this case we
-	     only need to check the beginning of these lists to see if the
-	     pointers at the head of the list are inconsistent.  */
-	  list_t *l = NULL;
-
-	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
-	    l = &GL (dl_stack_used);
-	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
-	    l = &GL (dl_stack_cache);
-
-	  if (l != NULL)
-	    {
-	      assert (l->next->prev == elem);
-	      elem->next = l->next;
-	      elem->prev = l;
-	      l->next = elem;
-	    }
-	}
-      else
-	{
-	  /* We can simply always replay the delete operation.  */
-	  elem->next->prev = elem->prev;
-	  elem->prev->next = elem->next;
-	}
-
-      GL (dl_in_flight_stack) = 0;
-    }
-
-  /* Mark all stacks except the still running one as free.  */
-  list_t *runp;
-  list_for_each (runp, &GL (dl_stack_used))
-    {
-      struct pthread *curp = list_entry (runp, struct pthread, list);
-      if (curp != self)
-	{
-	  /* This marks the stack as free.  */
-	  curp->tid = 0;
-
-	  /* Account for the size of the stack.  */
-	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
-
-	  if (curp->specific_used)
-	    {
-	      /* Clear the thread-specific data.  */
-	      memset (curp->specific_1stblock, '\0',
-		      sizeof (curp->specific_1stblock));
-
-	      curp->specific_used = false;
-
-	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
-		if (curp->specific[cnt] != NULL)
-		  {
-		    memset (curp->specific[cnt], '\0',
-			    sizeof (curp->specific_1stblock));
-
-		    /* We have allocated the block which we do not
-		       free here so re-set the bit.  */
-		    curp->specific_used = true;
-		  }
-	    }
-	}
-    }
-
-  /* Add the stack of all running threads to the cache.  */
-  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
-
-  /* Remove the entry for the current thread to from the cache list
-     and add it to the list of running threads.  Which of the two
-     lists is decided by the user_stack flag.  */
-  list_del (&self->list);
-
-  /* Re-initialize the lists for all the threads.  */
-  INIT_LIST_HEAD (&GL (dl_stack_used));
-  INIT_LIST_HEAD (&GL (dl_stack_user));
-
-  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
-    list_add (&self->list, &GL (dl_stack_user));
-  else
-    list_add (&self->list, &GL (dl_stack_used));
-
-  /* There is one thread running.  */
-  __nptl_nthreads = 1;
-
-  /* Initialize locks.  */
-  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
-  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
-}
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index 4c89e7a792..16fb66bdf5 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -172,9 +172,6 @@  __pthread_initialize_minimal_internal (void)
   __default_pthread_attr.internal.stacksize = limit.rlim_cur;
   __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
   lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
-
-  /* Register the fork generation counter with the libc.  */
-  __libc_pthread_init (__reclaim_stacks);
 }
 strong_alias (__pthread_initialize_minimal_internal,
 	      __pthread_initialize_minimal)
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 6b912f053b..d9b97c814a 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -333,10 +333,6 @@  extern void __free_tcb (struct pthread *pd) attribute_hidden;
 /* Free allocated stack.  */
 extern void __deallocate_stack (struct pthread *pd) attribute_hidden;
 
-/* Mark all the stacks except for the current one as available.  This
-   function also re-initializes the lock for the stack cache.  */
-extern void __reclaim_stacks (void) attribute_hidden;
-
 /* Change the permissions of a thread stack.  Called from
    _dl_make_stacks_executable and pthread_create.  */
 int
@@ -372,9 +368,6 @@  extern unsigned long int __fork_generation attribute_hidden;
 /* Pointer to the fork generation counter in the thread library.  */
 extern unsigned long int *__fork_generation_pointer attribute_hidden;
 
-/* Register the generation counter in the libpthread with the libc.  */
-extern void __libc_pthread_init (void (*reclaim) (void));
-
 extern size_t __pthread_get_minstack (const pthread_attr_t *attr);
 
 /* Namespace save aliases.  */
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
index f41c40fca0..062b01265a 100644
--- a/sysdeps/nptl/fork.c
+++ b/sysdeps/nptl/fork.c
@@ -35,6 +35,7 @@ 
 #include <nss/nss_database.h>
 #include <unwind-link.h>
 #include <sys/single_threaded.h>
+#include <list.h>
 
 static void
 fresetlockfiles (void)
@@ -46,6 +47,106 @@  fresetlockfiles (void)
       _IO_lock_init (*((_IO_lock_t *) _IO_iter_file(i)->_lock));
 }
 
+/* In case of a fork() call the memory allocation in the child will be
+   the same but only one thread is running.  All stacks except that of
+   the one running thread are not used anymore.  We have to recycle
+   them.  */
+static void
+reclaim_stacks (void)
+{
+  struct pthread *self = (struct pthread *) THREAD_SELF;
+
+  /* No locking necessary.  The caller is the only stack in use.  But
+     we have to be aware that we might have interrupted a list
+     operation.  */
+
+  if (GL (dl_in_flight_stack) != 0)
+    {
+      bool add_p = GL (dl_in_flight_stack) & 1;
+      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
+
+      if (add_p)
+	{
+	  /* We always add at the beginning of the list.  So in this case we
+	     only need to check the beginning of these lists to see if the
+	     pointers at the head of the list are inconsistent.  */
+	  list_t *l = NULL;
+
+	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
+	    l = &GL (dl_stack_used);
+	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
+	    l = &GL (dl_stack_cache);
+
+	  if (l != NULL)
+	    {
+	      assert (l->next->prev == elem);
+	      elem->next = l->next;
+	      elem->prev = l;
+	      l->next = elem;
+	    }
+	}
+      else
+	{
+	  /* We can simply always replay the delete operation.  */
+	  elem->next->prev = elem->prev;
+	  elem->prev->next = elem->next;
+	}
+
+      GL (dl_in_flight_stack) = 0;
+    }
+
+  /* Mark all stacks except the still running one as free.  */
+  list_t *runp;
+  list_for_each (runp, &GL (dl_stack_used))
+    {
+      struct pthread *curp = list_entry (runp, struct pthread, list);
+      if (curp != self)
+	{
+	  /* This marks the stack as free.  */
+	  curp->tid = 0;
+
+	  /* Account for the size of the stack.  */
+	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
+
+	  if (curp->specific_used)
+	    {
+	      /* Clear the thread-specific data.  */
+	      memset (curp->specific_1stblock, '\0',
+		      sizeof (curp->specific_1stblock));
+
+	      curp->specific_used = false;
+
+	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
+		if (curp->specific[cnt] != NULL)
+		  {
+		    memset (curp->specific[cnt], '\0',
+			    sizeof (curp->specific_1stblock));
+
+		    /* We have allocated the block which we do not
+		       free here so re-set the bit.  */
+		    curp->specific_used = true;
+		  }
+	    }
+	}
+    }
+
+  /* Add the stack of all running threads to the cache.  */
+  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
+
+  /* Remove the entry for the current thread to from the cache list
+     and add it to the list of running threads.  Which of the two
+     lists is decided by the user_stack flag.  */
+  list_del (&self->list);
+
+  /* Re-initialize the lists for all the threads.  */
+  INIT_LIST_HEAD (&GL (dl_stack_used));
+  INIT_LIST_HEAD (&GL (dl_stack_user));
+
+  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
+    list_add (&self->list, &GL (dl_stack_user));
+  else
+    list_add (&self->list, &GL (dl_stack_used));
+}
 
 pid_t
 __libc_fork (void)
@@ -112,6 +213,13 @@  __libc_fork (void)
 	{
 	  __libc_unwind_link_after_fork ();
 
+	  /* There is one thread running.  */
+	  __nptl_nthreads = 1;
+
+	  /* Initialize thread library locks.  */
+	  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
+	  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
+
 	  /* Release malloc locks.  */
 	  call_function_static_weak (__malloc_fork_unlock_child);
 
@@ -128,6 +236,8 @@  __libc_fork (void)
       /* Reset the lock the dynamic loader uses to protect its data.  */
       __rtld_lock_initialize (GL(dl_load_lock));
 
+      reclaim_stacks ();
+
       /* Run the handlers registered for the child.  */
       __run_fork_handlers (atfork_run_child, multiple_threads);
     }