Fix rwlock stall with PREFER_WRITER_NONRECURSIVE_NP (bug 23861)

Message ID mvmh8gripro.fsf@suse.de
State Superseded
Headers show
Series
  • Fix rwlock stall with PREFER_WRITER_NONRECURSIVE_NP (bug 23861)
Related show

Commit Message

Andreas Schwab Nov. 8, 2018, 2:54 p.m.
[BZ #23861]
	* nptl/pthread_rwlock_common.c (__pthread_rwlock_rdlock_full):
	Update expected value for __readers while waiting on
	PTHREAD_RWLOCK_RWAITING.
	* nptl/tst-rwlock-pwn.c: New file.
	* nptl/Makefile (tests): Add tst-rwlock-pwn.
---
 nptl/Makefile                |  3 +-
 nptl/pthread_rwlock_common.c |  6 +--
 nptl/tst-rwlock-pwn.c        | 78 ++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 nptl/tst-rwlock-pwn.c

-- 
2.19.1


-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Comments

Carlos O'Donell Nov. 8, 2018, 7:36 p.m. | #1
On 11/8/18 9:54 AM, Andreas Schwab wrote:
> 	[BZ #23861]

> 	* nptl/pthread_rwlock_common.c (__pthread_rwlock_rdlock_full):

> 	Update expected value for __readers while waiting on

> 	PTHREAD_RWLOCK_RWAITING.

> 	* nptl/tst-rwlock-pwn.c: New file.

> 	* nptl/Makefile (tests): Add tst-rwlock-pwn.


Is this at all related to this bug?

https://sourceware.org/bugzilla/show_bug.cgi?id=23844

I know this bug is not using PREFER_WRITER_NONRECURSIVE_NP, but I was curious
if you looked at this bug also when reviewing this code?

> ---

>  nptl/Makefile                |  3 +-

>  nptl/pthread_rwlock_common.c |  6 +--

>  nptl/tst-rwlock-pwn.c        | 78 ++++++++++++++++++++++++++++++++++++

>  3 files changed, 83 insertions(+), 4 deletions(-)

>  create mode 100644 nptl/tst-rwlock-pwn.c

> 

> diff --git a/nptl/Makefile b/nptl/Makefile

> index 49b6faa330..c164b929b7 100644

> --- a/nptl/Makefile

> +++ b/nptl/Makefile

> @@ -318,7 +318,8 @@ tests = tst-attr1 tst-attr2 tst-attr3 tst-default-attr \

>  	tst-minstack-throw \

>  	tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \

>  	tst-cnd-timedwait tst-thrd-detach tst-mtx-basic tst-thrd-sleep \

> -	tst-mtx-recursive tst-tss-basic tst-call-once tst-mtx-timedlock

> +	tst-mtx-recursive tst-tss-basic tst-call-once tst-mtx-timedlock \

> +	tst-rwlock-pwn


Please don't call this pwn :-)

Please use a more descriptive name like "tst-rwlock-stall"

>  

>  tests-internal := tst-rwlock19 tst-rwlock20 \

>  		  tst-sem11 tst-sem12 tst-sem13 \

> diff --git a/nptl/pthread_rwlock_common.c b/nptl/pthread_rwlock_common.c

> index a290d08332..e95cbe4033 100644

> --- a/nptl/pthread_rwlock_common.c

> +++ b/nptl/pthread_rwlock_common.c

> @@ -314,12 +314,12 @@ __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,

>  		 harmless because the flag is just about the state of

>  		 __readers, and all threads set the flag under the same

>  		 conditions.  */

> -	      while ((atomic_load_relaxed (&rwlock->__data.__readers)

> -		  & PTHREAD_RWLOCK_RWAITING) != 0)

> +	      while (((r = atomic_load_relaxed (&rwlock->__data.__readers))

> +		      & PTHREAD_RWLOCK_RWAITING) != 0)

>  		{

>  		  int private = __pthread_rwlock_get_private (rwlock);

>  		  int err = futex_abstimed_wait (&rwlock->__data.__readers,

> -		      r, abstime, private);

> +						 r, abstime, private);


Why is this change correct?

>  		  /* We ignore EAGAIN and EINTR.  On time-outs, we can just

>  		     return because we don't need to clean up anything.  */

>  		  if (err == ETIMEDOUT)

> diff --git a/nptl/tst-rwlock-pwn.c b/nptl/tst-rwlock-pwn.c

> new file mode 100644

> index 0000000000..a2be59576a

> --- /dev/null

> +++ b/nptl/tst-rwlock-pwn.c

> @@ -0,0 +1,78 @@

> +/* Test rwlock with PREFER_WRITER_NONRECURSIVE_NP.


OK.

> +   Copyright (C) 2018 Free Software Foundation, Inc.

> +   This file is part of the GNU C Library.

> +

> +   The GNU C Library is free software; you can redistribute it and/or

> +   modify it under the terms of the GNU Lesser General Public

> +   License as published by the Free Software Foundation; either

> +   version 2.1 of the License, or (at your option) any later version.

> +

> +   The GNU C Library is distributed in the hope that it will be useful,

> +   but WITHOUT ANY WARRANTY; without even the implied warranty of

> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +   Lesser General Public License for more details.

> +

> +   You should have received a copy of the GNU Lesser General Public

> +   License along with the GNU C Library; if not, see

> +   <http://www.gnu.org/licenses/>.  */

> +

> +#include <stdio.h>

> +#include <stdlib.h>

> +#include <unistd.h>

> +#include <pthread.h>

> +#include <support/xthread.h>

> +

> +#define LOOPS 10


Why 10?

> +#define NTHREADS 3


Why 3?

Suggest:

/* We choose 10 iterations and 3 threads because this happens
   to be able to trigger the stall today on modern hardware.  */

> +

> +volatile int do_exit;

> +pthread_rwlockattr_t mylock_attr;

> +pthread_rwlock_t mylock;

> +

> +void *

> +wrloop (void *a)


Misleading function name, it executes a wrlock or rdlock
not just 'writes in loop'.

Suggest 'wr_rd_loop' or just 'lock_loop'.

> +{

> +  while (!do_exit)

> +    {

> +      if (random () & 1)


Please make the test deterministic.

Does it still stall if you just increment a counter
and then alternate between the two operations?

> +	{

> +	  xpthread_rwlock_wrlock (&mylock);

> +	  xpthread_rwlock_unlock (&mylock);

> +	}

> +      else

> +	{

> +	  xpthread_rwlock_rdlock (&mylock);

> +	  xpthread_rwlock_unlock (&mylock);

> +	}

> +    }

> +  return NULL;

> +}

> +

> +int

> +do_test (void)

> +{

> +  xpthread_rwlockattr_init (&mylock_attr);

> +  xpthread_rwlockattr_setkind_np (&mylock_attr,

> +				  PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);

> +  xpthread_rwlock_init (&mylock, &mylock_attr);


OK.

> +

> +  for (int n = 0; n < LOOPS; n++)

> +    {

> +      pthread_t tids[NTHREADS];

> +      do_exit = 0;

> +      for (int i = 0; i < NTHREADS; i++)

> +	tids[i] = xpthread_create (NULL, wrloop, NULL);

> +      sleep (1);


Why sleep?

> +      printf ("Exiting..."); fflush (stdout);


Two lines please.

> +      do_exit = 1;

> +      for (int i = 0; i < NTHREADS; i++)

> +	xpthread_join (tids[i]);

> +      printf ("done.\n");

> +    }

> +  pthread_rwlock_destroy (&mylock);

> +  pthread_rwlockattr_destroy (&mylock_attr);

> +  return 0;

> +}

> +

> +#define TIMEOUT 3 * LOOPS


Please add a constant startup time.

Suggest:

#define TIMEOUT (DEFAULT_TIMEOUT + 3 * LOOPS)

> +#include <support/test-driver.c>

> 



-- 
Cheers,
Carlos.
Andreas Schwab Nov. 12, 2018, 9:08 a.m. | #2
On Nov 08 2018, Carlos O'Donell <carlos@redhat.com> wrote:

> On 11/8/18 9:54 AM, Andreas Schwab wrote:

>> 	[BZ #23861]

>> 	* nptl/pthread_rwlock_common.c (__pthread_rwlock_rdlock_full):

>> 	Update expected value for __readers while waiting on

>> 	PTHREAD_RWLOCK_RWAITING.

>> 	* nptl/tst-rwlock-pwn.c: New file.

>> 	* nptl/Makefile (tests): Add tst-rwlock-pwn.

>

> Is this at all related to this bug?

>

> https://sourceware.org/bugzilla/show_bug.cgi?id=23844


This part of the code is exclusively used for
PREFER_WRITER_NONRECURSIVE_NP rwlocks.

> Please don't call this pwn :-)


Why?

> Please use a more descriptive name like "tst-rwlock-stall"


It is testing PREFER_WRITER_NONRECURSIVE_NP, so pwn is very descriptive.

>>  		  int err = futex_abstimed_wait (&rwlock->__data.__readers,

>> -		      r, abstime, private);

>> +						 r, abstime, private);

>

> Why is this change correct?


Emacs told me so, and Emacs is always right.

>> +#define LOOPS 10

>

> Why 10?


Just to make the bug reproducable enough.

>

>> +#define NTHREADS 3

>

> Why 3?


From the original test case.

> Does it still stall if you just increment a counter

> and then alternate between the two operations?


No.

>> +  for (int n = 0; n < LOOPS; n++)

>> +    {

>> +      pthread_t tids[NTHREADS];

>> +      do_exit = 0;

>> +      for (int i = 0; i < NTHREADS; i++)

>> +	tids[i] = xpthread_create (NULL, wrloop, NULL);

>> +      sleep (1);

>

> Why sleep?


To let the threads running for some time.

Andreas.

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
Torvald Riegel Dec. 7, 2018, 1:56 p.m. | #3
On Thu, 2018-11-08 at 14:36 -0500, Carlos O'Donell wrote:
> On 11/8/18 9:54 AM, Andreas Schwab wrote:

> > 	[BZ #23861]

> > 	* nptl/pthread_rwlock_common.c (__pthread_rwlock_rdlock_full):

> > 	Update expected value for __readers while waiting on

> > 	PTHREAD_RWLOCK_RWAITING.

> > 	* nptl/tst-rwlock-pwn.c: New file.

> > 	* nptl/Makefile (tests): Add tst-rwlock-pwn.

> 

> Is this at all related to this bug?

> 

> https://sourceware.org/bugzilla/show_bug.cgi?id=23844


No that's a different bug.  At least the bug in the code is different, not
sure whether it could result in the same symptoms.

> >  

> >  tests-internal := tst-rwlock19 tst-rwlock20 \

> >  		  tst-sem11 tst-sem12 tst-sem13 \

> > diff --git a/nptl/pthread_rwlock_common.c b/nptl/pthread_rwlock_common.c

> > index a290d08332..e95cbe4033 100644

> > --- a/nptl/pthread_rwlock_common.c

> > +++ b/nptl/pthread_rwlock_common.c

> > @@ -314,12 +314,12 @@ __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,

> >  		 harmless because the flag is just about the state of

> >  		 __readers, and all threads set the flag under the same

> >  		 conditions.  */


I'd add this to the comment:
"Update r so that the futex call in the loop uses the correct value."

> > -	      while ((atomic_load_relaxed (&rwlock->__data.__readers)

> > -		  & PTHREAD_RWLOCK_RWAITING) != 0)

> > +	      while (((r = atomic_load_relaxed (&rwlock->__data.__readers))

> > +		      & PTHREAD_RWLOCK_RWAITING) != 0)

> >  		{

> >  		  int private = __pthread_rwlock_get_private (rwlock);

> >  		  int err = futex_abstimed_wait (&rwlock->__data.__readers,

> > -		      r, abstime, private);

> > +						 r, abstime, private);

> 

> Why is this change correct?


The fix is fine (and the formatting too, I guess).  I think the comment
above should explain the fix.  The futex call needs to wait on the based on
the value that lead to calling futex_wait in the first place.

Patch

diff --git a/nptl/Makefile b/nptl/Makefile
index 49b6faa330..c164b929b7 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -318,7 +318,8 @@  tests = tst-attr1 tst-attr2 tst-attr3 tst-default-attr \
 	tst-minstack-throw \
 	tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \
 	tst-cnd-timedwait tst-thrd-detach tst-mtx-basic tst-thrd-sleep \
-	tst-mtx-recursive tst-tss-basic tst-call-once tst-mtx-timedlock
+	tst-mtx-recursive tst-tss-basic tst-call-once tst-mtx-timedlock \
+	tst-rwlock-pwn
 
 tests-internal := tst-rwlock19 tst-rwlock20 \
 		  tst-sem11 tst-sem12 tst-sem13 \
diff --git a/nptl/pthread_rwlock_common.c b/nptl/pthread_rwlock_common.c
index a290d08332..e95cbe4033 100644
--- a/nptl/pthread_rwlock_common.c
+++ b/nptl/pthread_rwlock_common.c
@@ -314,12 +314,12 @@  __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,
 		 harmless because the flag is just about the state of
 		 __readers, and all threads set the flag under the same
 		 conditions.  */
-	      while ((atomic_load_relaxed (&rwlock->__data.__readers)
-		  & PTHREAD_RWLOCK_RWAITING) != 0)
+	      while (((r = atomic_load_relaxed (&rwlock->__data.__readers))
+		      & PTHREAD_RWLOCK_RWAITING) != 0)
 		{
 		  int private = __pthread_rwlock_get_private (rwlock);
 		  int err = futex_abstimed_wait (&rwlock->__data.__readers,
-		      r, abstime, private);
+						 r, abstime, private);
 		  /* We ignore EAGAIN and EINTR.  On time-outs, we can just
 		     return because we don't need to clean up anything.  */
 		  if (err == ETIMEDOUT)
diff --git a/nptl/tst-rwlock-pwn.c b/nptl/tst-rwlock-pwn.c
new file mode 100644
index 0000000000..a2be59576a
--- /dev/null
+++ b/nptl/tst-rwlock-pwn.c
@@ -0,0 +1,78 @@ 
+/* Test rwlock with PREFER_WRITER_NONRECURSIVE_NP.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <support/xthread.h>
+
+#define LOOPS 10
+#define NTHREADS 3
+
+volatile int do_exit;
+pthread_rwlockattr_t mylock_attr;
+pthread_rwlock_t mylock;
+
+void *
+wrloop (void *a)
+{
+  while (!do_exit)
+    {
+      if (random () & 1)
+	{
+	  xpthread_rwlock_wrlock (&mylock);
+	  xpthread_rwlock_unlock (&mylock);
+	}
+      else
+	{
+	  xpthread_rwlock_rdlock (&mylock);
+	  xpthread_rwlock_unlock (&mylock);
+	}
+    }
+  return NULL;
+}
+
+int
+do_test (void)
+{
+  xpthread_rwlockattr_init (&mylock_attr);
+  xpthread_rwlockattr_setkind_np (&mylock_attr,
+				  PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+  xpthread_rwlock_init (&mylock, &mylock_attr);
+
+  for (int n = 0; n < LOOPS; n++)
+    {
+      pthread_t tids[NTHREADS];
+      do_exit = 0;
+      for (int i = 0; i < NTHREADS; i++)
+	tids[i] = xpthread_create (NULL, wrloop, NULL);
+      sleep (1);
+      printf ("Exiting..."); fflush (stdout);
+      do_exit = 1;
+      for (int i = 0; i < NTHREADS; i++)
+	xpthread_join (tids[i]);
+      printf ("done.\n");
+    }
+  pthread_rwlock_destroy (&mylock);
+  pthread_rwlockattr_destroy (&mylock_attr);
+  return 0;
+}
+
+#define TIMEOUT 3 * LOOPS
+#include <support/test-driver.c>