stdlib: Fix data race in __run_exit_handlers

Message ID 20210426192729.1745682-1-vitalybuka@google.com
State New
Headers show
Series
  • stdlib: Fix data race in __run_exit_handlers
Related show

Commit Message

Borislav Petkov via Libc-alpha April 26, 2021, 7:27 p.m.
Fixes https://sourceware.org/bugzilla/show_bug.cgi?id=27749

Keep __exit_funcs_lock almost all the time and unlock it only to execute
callbacks. This fixed two issues.

1. f->func.cxa was modified outside the lock with rare data race like:
	thread 0: __run_exit_handlers unlock __exit_funcs_lock
	thread 1: __internal_atexit locks __exit_funcs_lock
	thread 0: f->flavor = ef_free;
	thread 1: sees ef_free and use it as new
	thread 1: new->func.cxa.fn = (void (*) (void *, int)) func;
	thread 1: new->func.cxa.arg = arg;
	thread 1: new->flavor = ef_cxa;
	thread 0: cxafct = f->func.cxa.fn;  // it's wrong fn!
	thread 0: cxafct (f->func.cxa.arg, status);  // it's wrong arg!
	thread 0: goto restart;
	thread 0: call the same exit_function again as it's ef_cxa

2. Don't unlock in main while loop after *listp = cur->next. If *listp
   is NULL and __exit_funcs_done is false another thread may fail in
   __new_exitfn on assert (l != NULL):
	 thread 0: *listp = cur->next;  // It can be the last: *listp = NULL.
	 thread 0: __libc_lock_unlock
	 thread 1: __libc_lock_lock in __on_exit
	 thread 1: __new_exitfn
	 thread 1: if (__exit_funcs_done)  // false: thread 0 isn't there yet.
	 thread 1: l = *listp
	 thread 1: moves one and crashes on assert (l != NULL);

The test needs multiple iterations to consistently fail without the fix.
---
 stdlib/Makefile                |   4 +-
 stdlib/exit.c                  |  28 ++++++---
 stdlib/test-cxa_atexit-race2.c | 110 +++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 11 deletions(-)
 create mode 100644 stdlib/test-cxa_atexit-race2.c

-- 
2.31.1.498.g6c1eba8ee3d-goog

Comments

Borislav Petkov via Libc-alpha May 13, 2021, 1:15 p.m. | #1
On 26/04/2021 16:27, Vitaly Buka via Libc-alpha wrote:
> Fixes https://sourceware.org/bugzilla/show_bug.cgi?id=27749

> 

> Keep __exit_funcs_lock almost all the time and unlock it only to execute

> callbacks. This fixed two issues.

> 

> 1. f->func.cxa was modified outside the lock with rare data race like:

> 	thread 0: __run_exit_handlers unlock __exit_funcs_lock

> 	thread 1: __internal_atexit locks __exit_funcs_lock

> 	thread 0: f->flavor = ef_free;

> 	thread 1: sees ef_free and use it as new

> 	thread 1: new->func.cxa.fn = (void (*) (void *, int)) func;

> 	thread 1: new->func.cxa.arg = arg;

> 	thread 1: new->flavor = ef_cxa;

> 	thread 0: cxafct = f->func.cxa.fn;  // it's wrong fn!

> 	thread 0: cxafct (f->func.cxa.arg, status);  // it's wrong arg!

> 	thread 0: goto restart;

> 	thread 0: call the same exit_function again as it's ef_cxa


Ok, the small window between fetching the function pointer and argument
from the list is triggering a race condition.

> 

> 2. Don't unlock in main while loop after *listp = cur->next. If *listp

>    is NULL and __exit_funcs_done is false another thread may fail in

>    __new_exitfn on assert (l != NULL):

> 	 thread 0: *listp = cur->next;  // It can be the last: *listp = NULL.

> 	 thread 0: __libc_lock_unlock

> 	 thread 1: __libc_lock_lock in __on_exit

> 	 thread 1: __new_exitfn

> 	 thread 1: if (__exit_funcs_done)  // false: thread 0 isn't there yet.

> 	 thread 1: l = *listp

> 	 thread 1: moves one and crashes on assert (l != NULL);


Yeah, this is tricky but it does look correct.  I guess the lock/unlock
during the loop was added to give a chance to concurrent 
__cxa_atexit / on_exit to have a chance to add a new callback, but it 
also only complicates things as you noted.  We might try to fix it on the
__new_exitfn (to avoid the assert), but I see the current approach of 
locking the list and only unlocking while running the callback is the
right approach.

The patch look ok in general, I added some comments below.  I have
adjusted the patch based on my comments [1], if you are ok with them
I can push it upstream.

[1] https://sourceware.org/git/?p=glibc.git;a=shortlog;h=refs/heads/azanella/bz27749-atexit-fix

> 

> The test needs multiple iterations to consistently fail without the fix.

> ---

>  stdlib/Makefile                |   4 +-

>  stdlib/exit.c                  |  28 ++++++---

>  stdlib/test-cxa_atexit-race2.c | 110 +++++++++++++++++++++++++++++++++

>  3 files changed, 131 insertions(+), 11 deletions(-)

>  create mode 100644 stdlib/test-cxa_atexit-race2.c

> 

> diff --git a/stdlib/Makefile b/stdlib/Makefile

> index b3b30ab73e..f5755a1654 100644

> --- a/stdlib/Makefile

> +++ b/stdlib/Makefile

> @@ -81,7 +81,8 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \

>  		   tst-width-stdint tst-strfrom tst-strfrom-locale	    \

>  		   tst-getrandom tst-atexit tst-at_quick_exit 		    \

>  		   tst-cxa_atexit tst-on_exit test-atexit-race 		    \

> -		   test-at_quick_exit-race test-cxa_atexit-race             \

> +		   test-at_quick_exit-race test-cxa_atexit-race		    \

> +		   test-cxa_atexit-race2				    \

>  		   test-on_exit-race test-dlclose-exit-race 		    \

>  		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \

>  		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \

> @@ -100,6 +101,7 @@ endif

>  LDLIBS-test-atexit-race = $(shared-thread-library)

>  LDLIBS-test-at_quick_exit-race = $(shared-thread-library)

>  LDLIBS-test-cxa_atexit-race = $(shared-thread-library)

> +LDLIBS-test-cxa_atexit-race2 = $(shared-thread-library)

>  LDLIBS-test-on_exit-race = $(shared-thread-library)

>  LDLIBS-tst-canon-bz26341 = $(shared-thread-library)

>  

> diff --git a/stdlib/exit.c b/stdlib/exit.c

> index bed82733ad..f095b38ab3 100644

> --- a/stdlib/exit.c

> +++ b/stdlib/exit.c

> @@ -45,6 +45,8 @@ __run_exit_handlers (int status, struct exit_function_list **listp,

>      if (run_dtors)

>        __call_tls_dtors ();

>  

> +  __libc_lock_lock (__exit_funcs_lock);

> +

>    /* We do it this way to handle recursive calls to exit () made by

>       the functions registered with `atexit' and `on_exit'. We call

>       everyone on the list and use the status value in the last


Ok, it avoids the second race condition.

> @@ -53,8 +55,6 @@ __run_exit_handlers (int status, struct exit_function_list **listp,

>      {

>        struct exit_function_list *cur;

>  

> -      __libc_lock_lock (__exit_funcs_lock);

> -

>      restart:

>        cur = *listp;

>  


I think there is no need use the goto anymore, since there is no need
to unlock the lock within the loop (the goto can be just a continue).

> @@ -63,7 +63,6 @@ __run_exit_handlers (int status, struct exit_function_list **listp,

>  	  /* Exit processing complete.  We will not allow any more

>  	     atexit/on_exit registrations.  */

>  	  __exit_funcs_done = true;

> -	  __libc_lock_unlock (__exit_funcs_lock);

>  	  break;

>  	}

>  


Ok, there is no need to unlock on break anymore.

> @@ -72,44 +71,52 @@ __run_exit_handlers (int status, struct exit_function_list **listp,

>  	  struct exit_function *const f = &cur->fns[--cur->idx];

>  	  const uint64_t new_exitfn_called = __new_exitfn_called;

>  

> -	  /* Unlock the list while we call a foreign function.  */

> -	  __libc_lock_unlock (__exit_funcs_lock);

>  	  switch (f->flavor)

>  	    {

>  	      void (*atfct) (void);

>  	      void (*onfct) (int status, void *arg);

>  	      void (*cxafct) (void *arg, int status);

> +	      void *arg;

>  

>  	    case ef_free:

>  	    case ef_us:

>  	      break;

>  	    case ef_on:

>  	      onfct = f->func.on.fn;

> +	      arg = f->func.on.arg;

>  #ifdef PTR_DEMANGLE

>  	      PTR_DEMANGLE (onfct);

>  #endif

> -	      onfct (status, f->func.on.arg);

> +	      /* Unlock the list while we call a foreign function.  */

> +	      __libc_lock_unlock (__exit_funcs_lock);

> +	      onfct (status, arg);

> +	      __libc_lock_lock (__exit_funcs_lock);

>  	      break;

>  	    case ef_at:

>  	      atfct = f->func.at;


Ok.

>  #ifdef PTR_DEMANGLE

>  	      PTR_DEMANGLE (atfct);

>  #endif

> +	      /* Unlock the list while we call a foreign function.  */

> +	      __libc_lock_unlock (__exit_funcs_lock);

>  	      atfct ();

> +	      __libc_lock_lock (__exit_funcs_lock);

>  	      break;


Ok.

>  	    case ef_cxa:

>  	      /* To avoid dlclose/exit race calling cxafct twice (BZ 22180),

>  		 we must mark this function as ef_free.  */

>  	      f->flavor = ef_free;

>  	      cxafct = f->func.cxa.fn;

> +	      arg = f->func.cxa.arg;

>  #ifdef PTR_DEMANGLE

>  	      PTR_DEMANGLE (cxafct);

>  #endif

> -	      cxafct (f->func.cxa.arg, status);

> +	      /* Unlock the list while we call a foreign function.  */

> +	      __libc_lock_unlock (__exit_funcs_lock);

> +	      cxafct (arg, status);

> +	      __libc_lock_lock (__exit_funcs_lock);

>  	      break;

>  	    }

> -	  /* Re-lock again before looking at global state.  */

> -	  __libc_lock_lock (__exit_funcs_lock);

>  

>  	  if (__glibc_unlikely (new_exitfn_called != __new_exitfn_called))

>  	    /* The last exit function, or another thread, has registered


Ok.

> @@ -123,9 +130,10 @@ __run_exit_handlers (int status, struct exit_function_list **listp,

>  	   allocate element.  */

>  	free (cur);

>  

> -      __libc_lock_unlock (__exit_funcs_lock);


Just remove the extra newline below as well.

>      }

>  

> +  __libc_lock_unlock (__exit_funcs_lock);

> +

>    if (run_list_atexit)

>      RUN_HOOK (__libc_atexit, ());

>  


Ok.

> diff --git a/stdlib/test-cxa_atexit-race2.c b/stdlib/test-cxa_atexit-race2.c

> new file mode 100644

> index 0000000000..d8c3d418e7

> --- /dev/null

> +++ b/stdlib/test-cxa_atexit-race2.c

> @@ -0,0 +1,110 @@

> +/* Support file for atexit/exit, etc. race tests.


I think it would be good to add a reference to the bug report.

> +   Copyright (C) 2017-2021 Free Software Foundation, Inc.

> +   This file is part of the GNU C Library.

> +

> +   The GNU C Library is free software; you can redistribute it and/or

> +   modify it under the terms of the GNU Lesser General Public

> +   License as published by the Free Software Foundation; either

> +   version 2.1 of the License, or (at your option) any later version.

> +

> +   The GNU C Library is distributed in the hope that it will be useful,

> +   but WITHOUT ANY WARRANTY; without even the implied warranty of

> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +   Lesser General Public License for more details.

> +

> +   You should have received a copy of the GNU Lesser General Public

> +   License along with the GNU C Library; if not, see

> +   <https://www.gnu.org/licenses/>.  */

> +

> +/* This file must be run from within a directory called "stdlib".  */


I don't think this true.

> +

> +/* The atexit/exit, at_quick_exit/quick_exit, __cxa_atexit/exit, etc. exhibited

> +   data race while calling destructors.

> +

> +   This test registers destructors from the background thread, and checks that

> +   the same destructor is not called more than once.  */

> +

> +#include <stdatomic.h>

> +#include <stdio.h>

> +#include <stdlib.h>

> +#include <support/xthread.h>

> +#include <sys/wait.h>

> +#include <unistd.h>

> +

> +static atomic_int registered;

> +static atomic_int todo = 100000;

> +

> +static void

> +atexit_cb (void *arg)

> +{

> +  atomic_fetch_sub (&registered, 1);

> +  static void *prev;

> +  if (arg == prev)

> +    {

> +      printf ("%p\n", arg);

> +      abort ();


Use FAIL_EXIT1 here.

> +    }

> +  prev = arg;

> +

> +  while (atomic_load (&todo) > 0 && atomic_load (&registered) < 100)

> +    ;

> +}

> +

> +int __cxa_atexit (void (*func) (void *), void *arg, void *d);

> +

> +static void *

> +thread_func (void *arg)

> +{

> +  void *cb_arg = NULL;

> +  while (atomic_load (&todo) > 0)


Add a open bracket here.

> +    if (atomic_load (&registered) < 10000)

> +      {

> +        int n = 10;

> +        for (int i = 0; i < n; ++i)

> +          __cxa_atexit (&atexit_cb, ++cb_arg, 0);

> +        atomic_fetch_add (&registered, n);

> +        atomic_fetch_sub (&todo, n);

> +      }

> +  return 0;


Use NULL here.

> +}

> +

> +static void


I would add a _Noreturn here.

> +test_and_exit (void)

> +{

> +  pthread_attr_t attr;

> +

> +  xpthread_attr_init (&attr);

> +  xpthread_attr_setdetachstate (&attr, 1);

> +

> +  xpthread_create (&attr, thread_func, NULL);

> +  xpthread_attr_destroy (&attr);

> +  while (!atomic_load (&registered))


Check for 0 here (unless the return value is a bool the type check
should be explicit).

> +    ;

> +  exit (0);

> +}

> +

> +static int

> +do_test (void)

> +{

> +  for (int i = 0; i < 20; ++i)

> +    {

> +      for (int i = 0; i < 10; ++i)

> +        if (fork () == 0)


Use xfork.

> +          test_and_exit ();

> +

> +      int status;

> +      while (wait (&status) > 0)

> +        {

> +          if (!WIFEXITED (status))


I prefer if we limit the number of wait call to check for invalid
return codes:

  for (int i = 0; i < 10; ++i)
    {
      int status;
      xwaitpid (0, &status, 0);
      if (!WIFEXITED (status))
        FAIL_EXIT1 ("Failed iterations %d", i);
      TEST_COMPARE (WEXITSTATUS (status), 0);
    }

> +            {

> +              printf ("Failed interation %d\n", i);

> +              abort ();


Use FAIL_EXIT1 here.

> +            }

> +        }

> +    }

> +

> +  exit (0);


There is no need to add an exit here.

> +}

> +

> +#define TEST_FUNCTION do_test

> +#include <support/test-driver.c>

>
Borislav Petkov via Libc-alpha May 14, 2021, 6:50 a.m. | #2
Thank you. These improvements look good to me. Please push it.

On Thu, 13 May 2021 at 06:15, Adhemerval Zanella <
adhemerval.zanella@linaro.org> wrote:

>

>

> On 26/04/2021 16:27, Vitaly Buka via Libc-alpha wrote:

> > Fixes https://sourceware.org/bugzilla/show_bug.cgi?id=27749

> >

> > Keep __exit_funcs_lock almost all the time and unlock it only to execute

> > callbacks. This fixed two issues.

> >

> > 1. f->func.cxa was modified outside the lock with rare data race like:

> >       thread 0: __run_exit_handlers unlock __exit_funcs_lock

> >       thread 1: __internal_atexit locks __exit_funcs_lock

> >       thread 0: f->flavor = ef_free;

> >       thread 1: sees ef_free and use it as new

> >       thread 1: new->func.cxa.fn = (void (*) (void *, int)) func;

> >       thread 1: new->func.cxa.arg = arg;

> >       thread 1: new->flavor = ef_cxa;

> >       thread 0: cxafct = f->func.cxa.fn;  // it's wrong fn!

> >       thread 0: cxafct (f->func.cxa.arg, status);  // it's wrong arg!

> >       thread 0: goto restart;

> >       thread 0: call the same exit_function again as it's ef_cxa

>

> Ok, the small window between fetching the function pointer and argument

> from the list is triggering a race condition.

>

> >

> > 2. Don't unlock in main while loop after *listp = cur->next. If *listp

> >    is NULL and __exit_funcs_done is false another thread may fail in

> >    __new_exitfn on assert (l != NULL):

> >        thread 0: *listp = cur->next;  // It can be the last: *listp =

> NULL.

> >        thread 0: __libc_lock_unlock

> >        thread 1: __libc_lock_lock in __on_exit

> >        thread 1: __new_exitfn

> >        thread 1: if (__exit_funcs_done)  // false: thread 0 isn't there

> yet.

> >        thread 1: l = *listp

> >        thread 1: moves one and crashes on assert (l != NULL);

>

> Yeah, this is tricky but it does look correct.  I guess the lock/unlock

> during the loop was added to give a chance to concurrent

> __cxa_atexit / on_exit to have a chance to add a new callback, but it

> also only complicates things as you noted.  We might try to fix it on the

> __new_exitfn (to avoid the assert), but I see the current approach of

> locking the list and only unlocking while running the callback is the

> right approach.

>

> The patch look ok in general, I added some comments below.  I have

> adjusted the patch based on my comments [1], if you are ok with them

> I can push it upstream.

>

> [1]

> https://sourceware.org/git/?p=glibc.git;a=shortlog;h=refs/heads/azanella/bz27749-atexit-fix

>

> >

> > The test needs multiple iterations to consistently fail without the fix.

> > ---

> >  stdlib/Makefile                |   4 +-

> >  stdlib/exit.c                  |  28 ++++++---

> >  stdlib/test-cxa_atexit-race2.c | 110 +++++++++++++++++++++++++++++++++

> >  3 files changed, 131 insertions(+), 11 deletions(-)

> >  create mode 100644 stdlib/test-cxa_atexit-race2.c

> >

> > diff --git a/stdlib/Makefile b/stdlib/Makefile

> > index b3b30ab73e..f5755a1654 100644

> > --- a/stdlib/Makefile

> > +++ b/stdlib/Makefile

> > @@ -81,7 +81,8 @@ tests               := tst-strtol tst-strtod testmb

> testrand testsort testdiv   \

> >                  tst-width-stdint tst-strfrom tst-strfrom-locale

>   \

> >                  tst-getrandom tst-atexit tst-at_quick_exit

>  \

> >                  tst-cxa_atexit tst-on_exit test-atexit-race

>   \

> > -                test-at_quick_exit-race test-cxa_atexit-race

>  \

> > +                test-at_quick_exit-race test-cxa_atexit-race

>  \

> > +                test-cxa_atexit-race2

>   \

> >                  test-on_exit-race test-dlclose-exit-race

>  \

> >                  tst-makecontext-align test-bz22786 tst-strtod-nan-sign \

> >                  tst-swapcontext1 tst-setcontext4 tst-setcontext5 \

> > @@ -100,6 +101,7 @@ endif

> >  LDLIBS-test-atexit-race = $(shared-thread-library)

> >  LDLIBS-test-at_quick_exit-race = $(shared-thread-library)

> >  LDLIBS-test-cxa_atexit-race = $(shared-thread-library)

> > +LDLIBS-test-cxa_atexit-race2 = $(shared-thread-library)

> >  LDLIBS-test-on_exit-race = $(shared-thread-library)

> >  LDLIBS-tst-canon-bz26341 = $(shared-thread-library)

> >

> > diff --git a/stdlib/exit.c b/stdlib/exit.c

> > index bed82733ad..f095b38ab3 100644

> > --- a/stdlib/exit.c

> > +++ b/stdlib/exit.c

> > @@ -45,6 +45,8 @@ __run_exit_handlers (int status, struct

> exit_function_list **listp,

> >      if (run_dtors)

> >        __call_tls_dtors ();

> >

> > +  __libc_lock_lock (__exit_funcs_lock);

> > +

> >    /* We do it this way to handle recursive calls to exit () made by

> >       the functions registered with `atexit' and `on_exit'. We call

> >       everyone on the list and use the status value in the last

>

> Ok, it avoids the second race condition.

>

> > @@ -53,8 +55,6 @@ __run_exit_handlers (int status, struct

> exit_function_list **listp,

> >      {

> >        struct exit_function_list *cur;

> >

> > -      __libc_lock_lock (__exit_funcs_lock);

> > -

> >      restart:

> >        cur = *listp;

> >

>

> I think there is no need use the goto anymore, since there is no need

> to unlock the lock within the loop (the goto can be just a continue).

>

> > @@ -63,7 +63,6 @@ __run_exit_handlers (int status, struct

> exit_function_list **listp,

> >         /* Exit processing complete.  We will not allow any more

> >            atexit/on_exit registrations.  */

> >         __exit_funcs_done = true;

> > -       __libc_lock_unlock (__exit_funcs_lock);

> >         break;

> >       }

> >

>

> Ok, there is no need to unlock on break anymore.

>

> > @@ -72,44 +71,52 @@ __run_exit_handlers (int status, struct

> exit_function_list **listp,

> >         struct exit_function *const f = &cur->fns[--cur->idx];

> >         const uint64_t new_exitfn_called = __new_exitfn_called;

> >

> > -       /* Unlock the list while we call a foreign function.  */

> > -       __libc_lock_unlock (__exit_funcs_lock);

> >         switch (f->flavor)

> >           {

> >             void (*atfct) (void);

> >             void (*onfct) (int status, void *arg);

> >             void (*cxafct) (void *arg, int status);

> > +           void *arg;

> >

> >           case ef_free:

> >           case ef_us:

> >             break;

> >           case ef_on:

> >             onfct = f->func.on.fn;

> > +           arg = f->func.on.arg;

> >  #ifdef PTR_DEMANGLE

> >             PTR_DEMANGLE (onfct);

> >  #endif

> > -           onfct (status, f->func.on.arg);

> > +           /* Unlock the list while we call a foreign function.  */

> > +           __libc_lock_unlock (__exit_funcs_lock);

> > +           onfct (status, arg);

> > +           __libc_lock_lock (__exit_funcs_lock);

> >             break;

> >           case ef_at:

> >             atfct = f->func.at;

>

> Ok.

>

> >  #ifdef PTR_DEMANGLE

> >             PTR_DEMANGLE (atfct);

> >  #endif

> > +           /* Unlock the list while we call a foreign function.  */

> > +           __libc_lock_unlock (__exit_funcs_lock);

> >             atfct ();

> > +           __libc_lock_lock (__exit_funcs_lock);

> >             break;

>

> Ok.

>

> >           case ef_cxa:

> >             /* To avoid dlclose/exit race calling cxafct twice (BZ

> 22180),

> >                we must mark this function as ef_free.  */

> >             f->flavor = ef_free;

> >             cxafct = f->func.cxa.fn;

> > +           arg = f->func.cxa.arg;

> >  #ifdef PTR_DEMANGLE

> >             PTR_DEMANGLE (cxafct);

> >  #endif

> > -           cxafct (f->func.cxa.arg, status);

> > +           /* Unlock the list while we call a foreign function.  */

> > +           __libc_lock_unlock (__exit_funcs_lock);

> > +           cxafct (arg, status);

> > +           __libc_lock_lock (__exit_funcs_lock);

> >             break;

> >           }

> > -       /* Re-lock again before looking at global state.  */

> > -       __libc_lock_lock (__exit_funcs_lock);

> >

> >         if (__glibc_unlikely (new_exitfn_called != __new_exitfn_called))

> >           /* The last exit function, or another thread, has registered

>

> Ok.

>

> > @@ -123,9 +130,10 @@ __run_exit_handlers (int status, struct

> exit_function_list **listp,

> >          allocate element.  */

> >       free (cur);

> >

> > -      __libc_lock_unlock (__exit_funcs_lock);

>

> Just remove the extra newline below as well.

>

> >      }

> >

> > +  __libc_lock_unlock (__exit_funcs_lock);

> > +

> >    if (run_list_atexit)

> >      RUN_HOOK (__libc_atexit, ());

> >

>

> Ok.

>

> > diff --git a/stdlib/test-cxa_atexit-race2.c

> b/stdlib/test-cxa_atexit-race2.c

> > new file mode 100644

> > index 0000000000..d8c3d418e7

> > --- /dev/null

> > +++ b/stdlib/test-cxa_atexit-race2.c

> > @@ -0,0 +1,110 @@

> > +/* Support file for atexit/exit, etc. race tests.

>

> I think it would be good to add a reference to the bug report.

>

> > +   Copyright (C) 2017-2021 Free Software Foundation, Inc.

> > +   This file is part of the GNU C Library.

> > +

> > +   The GNU C Library is free software; you can redistribute it and/or

> > +   modify it under the terms of the GNU Lesser General Public

> > +   License as published by the Free Software Foundation; either

> > +   version 2.1 of the License, or (at your option) any later version.

> > +

> > +   The GNU C Library is distributed in the hope that it will be useful,

> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of

> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > +   Lesser General Public License for more details.

> > +

> > +   You should have received a copy of the GNU Lesser General Public

> > +   License along with the GNU C Library; if not, see

> > +   <https://www.gnu.org/licenses/>.  */

> > +

> > +/* This file must be run from within a directory called "stdlib".  */

>

> I don't think this true.

>

> > +

> > +/* The atexit/exit, at_quick_exit/quick_exit, __cxa_atexit/exit, etc.

> exhibited

> > +   data race while calling destructors.

> > +

> > +   This test registers destructors from the background thread, and

> checks that

> > +   the same destructor is not called more than once.  */

> > +

> > +#include <stdatomic.h>

> > +#include <stdio.h>

> > +#include <stdlib.h>

> > +#include <support/xthread.h>

> > +#include <sys/wait.h>

> > +#include <unistd.h>

> > +

> > +static atomic_int registered;

> > +static atomic_int todo = 100000;

> > +

> > +static void

> > +atexit_cb (void *arg)

> > +{

> > +  atomic_fetch_sub (&registered, 1);

> > +  static void *prev;

> > +  if (arg == prev)

> > +    {

> > +      printf ("%p\n", arg);

> > +      abort ();

>

> Use FAIL_EXIT1 here.

>

> > +    }

> > +  prev = arg;

> > +

> > +  while (atomic_load (&todo) > 0 && atomic_load (&registered) < 100)

> > +    ;

> > +}

> > +

> > +int __cxa_atexit (void (*func) (void *), void *arg, void *d);

> > +

> > +static void *

> > +thread_func (void *arg)

> > +{

> > +  void *cb_arg = NULL;

> > +  while (atomic_load (&todo) > 0)

>

> Add a open bracket here.

>

> > +    if (atomic_load (&registered) < 10000)

> > +      {

> > +        int n = 10;

> > +        for (int i = 0; i < n; ++i)

> > +          __cxa_atexit (&atexit_cb, ++cb_arg, 0);

> > +        atomic_fetch_add (&registered, n);

> > +        atomic_fetch_sub (&todo, n);

> > +      }

> > +  return 0;

>

> Use NULL here.

>

> > +}

> > +

> > +static void

>

> I would add a _Noreturn here.

>

> > +test_and_exit (void)

> > +{

> > +  pthread_attr_t attr;

> > +

> > +  xpthread_attr_init (&attr);

> > +  xpthread_attr_setdetachstate (&attr, 1);

> > +

> > +  xpthread_create (&attr, thread_func, NULL);

> > +  xpthread_attr_destroy (&attr);

> > +  while (!atomic_load (&registered))

>

> Check for 0 here (unless the return value is a bool the type check

> should be explicit).

>

> > +    ;

> > +  exit (0);

> > +}

> > +

> > +static int

> > +do_test (void)

> > +{

> > +  for (int i = 0; i < 20; ++i)

> > +    {

> > +      for (int i = 0; i < 10; ++i)

> > +        if (fork () == 0)

>

> Use xfork.

>

> > +          test_and_exit ();

> > +

> > +      int status;

> > +      while (wait (&status) > 0)

> > +        {

> > +          if (!WIFEXITED (status))

>

> I prefer if we limit the number of wait call to check for invalid

> return codes:

>

>   for (int i = 0; i < 10; ++i)

>     {

>       int status;

>       xwaitpid (0, &status, 0);

>       if (!WIFEXITED (status))

>         FAIL_EXIT1 ("Failed iterations %d", i);

>       TEST_COMPARE (WEXITSTATUS (status), 0);

>     }

>

> > +            {

> > +              printf ("Failed interation %d\n", i);

> > +              abort ();

>

> Use FAIL_EXIT1 here.

>

> > +            }

> > +        }

> > +    }

> > +

> > +  exit (0);

>

> There is no need to add an exit here.

>

> > +}

> > +

> > +#define TEST_FUNCTION do_test

> > +#include <support/test-driver.c>

> >

>

Patch

diff --git a/stdlib/Makefile b/stdlib/Makefile
index b3b30ab73e..f5755a1654 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -81,7 +81,8 @@  tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-width-stdint tst-strfrom tst-strfrom-locale	    \
 		   tst-getrandom tst-atexit tst-at_quick_exit 		    \
 		   tst-cxa_atexit tst-on_exit test-atexit-race 		    \
-		   test-at_quick_exit-race test-cxa_atexit-race             \
+		   test-at_quick_exit-race test-cxa_atexit-race		    \
+		   test-cxa_atexit-race2				    \
 		   test-on_exit-race test-dlclose-exit-race 		    \
 		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
 		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
@@ -100,6 +101,7 @@  endif
 LDLIBS-test-atexit-race = $(shared-thread-library)
 LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
 LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
+LDLIBS-test-cxa_atexit-race2 = $(shared-thread-library)
 LDLIBS-test-on_exit-race = $(shared-thread-library)
 LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
 
diff --git a/stdlib/exit.c b/stdlib/exit.c
index bed82733ad..f095b38ab3 100644
--- a/stdlib/exit.c
+++ b/stdlib/exit.c
@@ -45,6 +45,8 @@  __run_exit_handlers (int status, struct exit_function_list **listp,
     if (run_dtors)
       __call_tls_dtors ();
 
+  __libc_lock_lock (__exit_funcs_lock);
+
   /* We do it this way to handle recursive calls to exit () made by
      the functions registered with `atexit' and `on_exit'. We call
      everyone on the list and use the status value in the last
@@ -53,8 +55,6 @@  __run_exit_handlers (int status, struct exit_function_list **listp,
     {
       struct exit_function_list *cur;
 
-      __libc_lock_lock (__exit_funcs_lock);
-
     restart:
       cur = *listp;
 
@@ -63,7 +63,6 @@  __run_exit_handlers (int status, struct exit_function_list **listp,
 	  /* Exit processing complete.  We will not allow any more
 	     atexit/on_exit registrations.  */
 	  __exit_funcs_done = true;
-	  __libc_lock_unlock (__exit_funcs_lock);
 	  break;
 	}
 
@@ -72,44 +71,52 @@  __run_exit_handlers (int status, struct exit_function_list **listp,
 	  struct exit_function *const f = &cur->fns[--cur->idx];
 	  const uint64_t new_exitfn_called = __new_exitfn_called;
 
-	  /* Unlock the list while we call a foreign function.  */
-	  __libc_lock_unlock (__exit_funcs_lock);
 	  switch (f->flavor)
 	    {
 	      void (*atfct) (void);
 	      void (*onfct) (int status, void *arg);
 	      void (*cxafct) (void *arg, int status);
+	      void *arg;
 
 	    case ef_free:
 	    case ef_us:
 	      break;
 	    case ef_on:
 	      onfct = f->func.on.fn;
+	      arg = f->func.on.arg;
 #ifdef PTR_DEMANGLE
 	      PTR_DEMANGLE (onfct);
 #endif
-	      onfct (status, f->func.on.arg);
+	      /* Unlock the list while we call a foreign function.  */
+	      __libc_lock_unlock (__exit_funcs_lock);
+	      onfct (status, arg);
+	      __libc_lock_lock (__exit_funcs_lock);
 	      break;
 	    case ef_at:
 	      atfct = f->func.at;
 #ifdef PTR_DEMANGLE
 	      PTR_DEMANGLE (atfct);
 #endif
+	      /* Unlock the list while we call a foreign function.  */
+	      __libc_lock_unlock (__exit_funcs_lock);
 	      atfct ();
+	      __libc_lock_lock (__exit_funcs_lock);
 	      break;
 	    case ef_cxa:
 	      /* To avoid dlclose/exit race calling cxafct twice (BZ 22180),
 		 we must mark this function as ef_free.  */
 	      f->flavor = ef_free;
 	      cxafct = f->func.cxa.fn;
+	      arg = f->func.cxa.arg;
 #ifdef PTR_DEMANGLE
 	      PTR_DEMANGLE (cxafct);
 #endif
-	      cxafct (f->func.cxa.arg, status);
+	      /* Unlock the list while we call a foreign function.  */
+	      __libc_lock_unlock (__exit_funcs_lock);
+	      cxafct (arg, status);
+	      __libc_lock_lock (__exit_funcs_lock);
 	      break;
 	    }
-	  /* Re-lock again before looking at global state.  */
-	  __libc_lock_lock (__exit_funcs_lock);
 
 	  if (__glibc_unlikely (new_exitfn_called != __new_exitfn_called))
 	    /* The last exit function, or another thread, has registered
@@ -123,9 +130,10 @@  __run_exit_handlers (int status, struct exit_function_list **listp,
 	   allocate element.  */
 	free (cur);
 
-      __libc_lock_unlock (__exit_funcs_lock);
     }
 
+  __libc_lock_unlock (__exit_funcs_lock);
+
   if (run_list_atexit)
     RUN_HOOK (__libc_atexit, ());
 
diff --git a/stdlib/test-cxa_atexit-race2.c b/stdlib/test-cxa_atexit-race2.c
new file mode 100644
index 0000000000..d8c3d418e7
--- /dev/null
+++ b/stdlib/test-cxa_atexit-race2.c
@@ -0,0 +1,110 @@ 
+/* Support file for atexit/exit, etc. race tests.
+   Copyright (C) 2017-2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This file must be run from within a directory called "stdlib".  */
+
+/* The atexit/exit, at_quick_exit/quick_exit, __cxa_atexit/exit, etc. exhibited
+   data race while calling destructors.
+
+   This test registers destructors from the background thread, and checks that
+   the same destructor is not called more than once.  */
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/xthread.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static atomic_int registered;
+static atomic_int todo = 100000;
+
+static void
+atexit_cb (void *arg)
+{
+  atomic_fetch_sub (&registered, 1);
+  static void *prev;
+  if (arg == prev)
+    {
+      printf ("%p\n", arg);
+      abort ();
+    }
+  prev = arg;
+
+  while (atomic_load (&todo) > 0 && atomic_load (&registered) < 100)
+    ;
+}
+
+int __cxa_atexit (void (*func) (void *), void *arg, void *d);
+
+static void *
+thread_func (void *arg)
+{
+  void *cb_arg = NULL;
+  while (atomic_load (&todo) > 0)
+    if (atomic_load (&registered) < 10000)
+      {
+        int n = 10;
+        for (int i = 0; i < n; ++i)
+          __cxa_atexit (&atexit_cb, ++cb_arg, 0);
+        atomic_fetch_add (&registered, n);
+        atomic_fetch_sub (&todo, n);
+      }
+  return 0;
+}
+
+static void
+test_and_exit (void)
+{
+  pthread_attr_t attr;
+
+  xpthread_attr_init (&attr);
+  xpthread_attr_setdetachstate (&attr, 1);
+
+  xpthread_create (&attr, thread_func, NULL);
+  xpthread_attr_destroy (&attr);
+  while (!atomic_load (&registered))
+    ;
+  exit (0);
+}
+
+static int
+do_test (void)
+{
+  for (int i = 0; i < 20; ++i)
+    {
+      for (int i = 0; i < 10; ++i)
+        if (fork () == 0)
+          test_and_exit ();
+
+      int status;
+      while (wait (&status) > 0)
+        {
+          if (!WIFEXITED (status))
+            {
+              printf ("Failed interation %d\n", i);
+              abort ();
+            }
+        }
+    }
+
+  exit (0);
+}
+
+#define TEST_FUNCTION do_test
+#include <support/test-driver.c>