[v2,02/13] elf, nptl: Resolve recursive lock implementation early

Message ID 32df0df2b98f923c651203bd20e3df950a889d87.1620323953.git.fweimer@redhat.com
State New
Headers show
Series
  • Linux: Move most stack management out of libpthread
Related show

Commit Message

Samuel Thibault via Libc-alpha May 6, 2021, 6:09 p.m.
If libpthread is included in libc, it is not necessary to delay
initialization of the lock/unlock function pointers until libpthread
is loaded.  This eliminates two unprotected function pointers
from _rtld_global and removes some initialization code from
libpthread.
---
v2: Rename dl-lock.c into dl-mutex.c and use a sysdeps override instead
    of a preprocessor conditional.

 elf/Makefile               |  3 ++-
 elf/dl-mutex.c             | 19 ++++++++++++++
 elf/rtld.c                 | 18 +++++++++++++
 nptl/nptl-init.c           |  9 -------
 sysdeps/generic/ldsodefs.h | 25 +++++++++++++++++-
 sysdeps/nptl/dl-mutex.c    | 53 ++++++++++++++++++++++++++++++++++++++
 sysdeps/nptl/libc-lockP.h  | 17 +++---------
 7 files changed, 120 insertions(+), 24 deletions(-)
 create mode 100644 elf/dl-mutex.c
 create mode 100644 sysdeps/nptl/dl-mutex.c

-- 
2.30.2

Comments

Samuel Thibault via Libc-alpha May 9, 2021, 9:42 p.m. | #1
On 5/6/21 2:09 PM, Florian Weimer via Libc-alpha wrote:
> If libpthread is included in libc, it is not necessary to delay

> initialization of the lock/unlock function pointers until libpthread

> is loaded.  This eliminates two unprotected function pointers

> from _rtld_global and removes some initialization code from

> libpthread.


This version looks good to me, and the early initialization makes it
logically easier to follow when reading the code. Despite the removal
of the unprotected function pointesr in _rtld_global, we still need
some function pointer in order to lookup the function symbols from libc.so
and remember their values, but data placement is harder to discover than
the fixed offset from a public symbol.

Tested on x86_64 and i686 without regression.

Tested-by: Carlos O'Donell <carlos@redhat.com>

Reviewed-by: Carlos O'Donell <carlos@redhat.com>



> ---

> v2: Rename dl-lock.c into dl-mutex.c and use a sysdeps override instead

>     of a preprocessor conditional.

> 

>  elf/Makefile               |  3 ++-

>  elf/dl-mutex.c             | 19 ++++++++++++++

>  elf/rtld.c                 | 18 +++++++++++++

>  nptl/nptl-init.c           |  9 -------

>  sysdeps/generic/ldsodefs.h | 25 +++++++++++++++++-

>  sysdeps/nptl/dl-mutex.c    | 53 ++++++++++++++++++++++++++++++++++++++

>  sysdeps/nptl/libc-lockP.h  | 17 +++---------

>  7 files changed, 120 insertions(+), 24 deletions(-)

>  create mode 100644 elf/dl-mutex.c

>  create mode 100644 sysdeps/nptl/dl-mutex.c

> 

> diff --git a/elf/Makefile b/elf/Makefile

> index 4f99af626f..d3e909637a 100644

> --- a/elf/Makefile

> +++ b/elf/Makefile

> @@ -66,7 +66,8 @@ elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \

>  # interpreter and operating independent of libc.

>  rtld-routines	= rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \

>    dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \

> -  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu

> +  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu \

> +  dl-mutex

>  all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)

>  

>  CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables

> diff --git a/elf/dl-mutex.c b/elf/dl-mutex.c

> new file mode 100644

> index 0000000000..2cd9d49c2e

> --- /dev/null

> +++ b/elf/dl-mutex.c

> @@ -0,0 +1,19 @@

> +/* Recursive locking implementation for the dynamic loader.  Generic version.

> +   Copyright (C) 2021 Free Software Foundation, Inc.

> +   This file is part of the GNU C Library.

> +

> +   The GNU C Library is free software; you can redistribute it and/or

> +   modify it under the terms of the GNU Lesser General Public

> +   License as published by the Free Software Foundation; either

> +   version 2.1 of the License, or (at your option) any later version.

> +

> +   The GNU C Library is distributed in the hope that it will be useful,

> +   but WITHOUT ANY WARRANTY; without even the implied warranty of

> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +   Lesser General Public License for more details.

> +

> +   You should have received a copy of the GNU Lesser General Public

> +   License along with the GNU C Library; if not, see

> +   <https://www.gnu.org/licenses/>.  */

> +

> +/* The generic version initialization happpens in dl_main.  */

> diff --git a/elf/rtld.c b/elf/rtld.c

> index ad325d4c10..a359167f8a 100644

> --- a/elf/rtld.c

> +++ b/elf/rtld.c

> @@ -857,6 +857,14 @@ rtld_lock_default_unlock_recursive (void *lock)

>    __rtld_lock_default_unlock_recursive (lock);

>  }

>  #endif

> +#if PTHREAD_IN_LIBC

> +/* Dummy implementation.  See __rtld_mutex_init.  */

> +static int

> +rtld_mutex_dummy (pthread_mutex_t *lock)

> +{

> +  return 0;

> +}

> +#endif

>  

>  

>  static void

> @@ -1148,6 +1156,10 @@ dl_main (const ElfW(Phdr) *phdr,

>    GL(dl_rtld_lock_recursive) = rtld_lock_default_lock_recursive;

>    GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive;

>  #endif

> +#if PTHREAD_IN_LIBC

> +  ___rtld_mutex_lock = rtld_mutex_dummy;

> +  ___rtld_mutex_unlock = rtld_mutex_dummy;

> +#endif

>  

>    /* The explicit initialization here is cheaper than processing the reloc

>       in the _rtld_local definition's initializer.  */

> @@ -2363,6 +2375,9 @@ dl_main (const ElfW(Phdr) *phdr,

>  	 loader.  */

>        __rtld_malloc_init_real (main_map);

>  

> +      /* Likewise for the locking implementation.  */

> +      __rtld_mutex_init ();

> +

>        /* Mark all the objects so we know they have been already relocated.  */

>        for (struct link_map *l = main_map; l != NULL; l = l->l_next)

>  	{

> @@ -2468,6 +2483,9 @@ dl_main (const ElfW(Phdr) *phdr,

>  	 at this point.  */

>        __rtld_malloc_init_real (main_map);

>  

> +      /* Likewise for the locking implementation.  */

> +      __rtld_mutex_init ();

> +

>        RTLD_TIMING_VAR (start);

>        rtld_timer_start (&start);

>  

> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c

> index fcab5a0904..2724770533 100644

> --- a/nptl/nptl-init.c

> +++ b/nptl/nptl-init.c

> @@ -179,15 +179,6 @@ __pthread_initialize_minimal_internal (void)

>    lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);

>  

>  #ifdef SHARED

> -  /* Make __rtld_lock_{,un}lock_recursive use pthread_mutex_{,un}lock,

> -     keep the lock count from the ld.so implementation.  */

> -  GL(dl_rtld_lock_recursive) = (void *) __pthread_mutex_lock;

> -  GL(dl_rtld_unlock_recursive) = (void *) __pthread_mutex_unlock;

> -  unsigned int rtld_lock_count = GL(dl_load_lock).mutex.__data.__count;

> -  GL(dl_load_lock).mutex.__data.__count = 0;

> -  while (rtld_lock_count-- > 0)

> -    __pthread_mutex_lock (&GL(dl_load_lock).mutex);

> -

>    GL(dl_make_stack_executable_hook) = &__make_stacks_executable;

>  #endif

>  

> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h

> index 1b064c5894..6d590d1335 100644

> --- a/sysdeps/generic/ldsodefs.h

> +++ b/sysdeps/generic/ldsodefs.h

> @@ -403,7 +403,7 @@ struct rtld_global

>    struct auditstate _dl_rtld_auditstate[DL_NNS];

>  #endif

>  

> -#if defined SHARED && defined _LIBC_REENTRANT \

> +#if !PTHREAD_IN_LIBC && defined SHARED \

>      && defined __rtld_lock_default_lock_recursive

>    EXTERN void (*_dl_rtld_lock_recursive) (void *);

>    EXTERN void (*_dl_rtld_unlock_recursive) (void *);

> @@ -1318,6 +1318,29 @@ link_map_audit_state (struct link_map *l, size_t index)

>  }

>  #endif /* SHARED */

>  

> +#if PTHREAD_IN_LIBC && defined SHARED

> +/* Recursive locking implementation for use within the dynamic loader.

> +   Used to define the __rtld_lock_lock_recursive and

> +   __rtld_lock_unlock_recursive via <libc-lock.h>.  Initialized to a

> +   no-op dummy implementation early.  Similar

> +   to GL (dl_rtld_lock_recursive) and GL (dl_rtld_unlock_recursive)

> +   in !PTHREAD_IN_LIBC builds.  */

> +extern int (*___rtld_mutex_lock) (pthread_mutex_t *) attribute_hidden;

> +extern int (*___rtld_mutex_unlock) (pthread_mutex_t *lock) attribute_hidden;

> +

> +/* Called after libc has been loaded, but before RELRO is activated.

> +   Used to initialize the function pointers to the actual

> +   implementations.  */

> +void __rtld_mutex_init (void) attribute_hidden;

> +#else /* !PTHREAD_IN_LIBC */

> +static inline void

> +__rtld_mutex_init (void)

> +{

> +  /* The initialization happens later (!PTHREAD_IN_LIBC) or is not

> +     needed at all (!SHARED).  */

> +}

> +#endif /* !PTHREAD_IN_LIBC */

> +

>  #if THREAD_GSCOPE_IN_TCB

>  void __thread_gscope_wait (void) attribute_hidden;

>  # define THREAD_GSCOPE_WAIT() __thread_gscope_wait ()

> diff --git a/sysdeps/nptl/dl-mutex.c b/sysdeps/nptl/dl-mutex.c

> new file mode 100644

> index 0000000000..08b71dc21b

> --- /dev/null

> +++ b/sysdeps/nptl/dl-mutex.c

> @@ -0,0 +1,53 @@

> +/* Recursive locking implementation for the dynamic loader.  NPTL version.

> +   Copyright (C) 2021 Free Software Foundation, Inc.

> +   This file is part of the GNU C Library.

> +

> +   The GNU C Library is free software; you can redistribute it and/or

> +   modify it under the terms of the GNU Lesser General Public

> +   License as published by the Free Software Foundation; either

> +   version 2.1 of the License, or (at your option) any later version.

> +

> +   The GNU C Library is distributed in the hope that it will be useful,

> +   but WITHOUT ANY WARRANTY; without even the implied warranty of

> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +   Lesser General Public License for more details.

> +

> +   You should have received a copy of the GNU Lesser General Public

> +   License along with the GNU C Library; if not, see

> +   <https://www.gnu.org/licenses/>.  */

> +

> +/* Use the mutex implementation in libc (assuming PTHREAD_IN_LIBC).  */

> +

> +#include <assert.h>

> +#include <first-versions.h>

> +#include <ldsodefs.h>

> +

> +__typeof (pthread_mutex_lock) *___rtld_mutex_lock attribute_relro;

> +__typeof (pthread_mutex_unlock) *___rtld_mutex_unlock attribute_relro;

> +

> +void

> +__rtld_mutex_init (void)

> +{

> +  /* There is an implicit assumption here that the lock counters are

> +     zero and this function is called while nothing is locked.  For

> +     early initialization of the mutex functions this is true because

> +     it happens directly in dl_main in elf/rtld.c, and not some ELF

> +     constructor while holding loader locks.  */

> +

> +  struct link_map *libc_map = GL (dl_ns)[LM_ID_BASE].libc_map;

> +

> +  const ElfW(Sym) *sym

> +    = _dl_lookup_direct (libc_map, "pthread_mutex_lock",

> +                         0x4f152227, /* dl_new_hash output.  */

> +                         FIRST_VERSION_libc_pthread_mutex_lock_STRING,

> +                         FIRST_VERSION_libc_pthread_mutex_lock_HASH);

> +  assert (sym != NULL);

> +  ___rtld_mutex_lock = DL_SYMBOL_ADDRESS (libc_map, sym);

> +

> +  sym = _dl_lookup_direct (libc_map, "pthread_mutex_unlock",

> +                           0x7dd7aaaa, /* dl_new_hash output.  */

> +                           FIRST_VERSION_libc_pthread_mutex_unlock_STRING,

> +                           FIRST_VERSION_libc_pthread_mutex_unlock_HASH);

> +  assert (sym != NULL);

> +  ___rtld_mutex_unlock = DL_SYMBOL_ADDRESS (libc_map, sym);

> +}

> diff --git a/sysdeps/nptl/libc-lockP.h b/sysdeps/nptl/libc-lockP.h

> index ae9691d40e..ec7b02bbdd 100644

> --- a/sysdeps/nptl/libc-lockP.h

> +++ b/sysdeps/nptl/libc-lockP.h

> @@ -151,9 +151,6 @@ _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");

>    __libc_maybe_call (__pthread_mutex_trylock, (&(NAME)), 0)

>  #endif

>  

> -#define __rtld_lock_trylock_recursive(NAME) \

> -  __libc_maybe_call (__pthread_mutex_trylock, (&(NAME).mutex), 0)

> -

>  /* Unlock the named lock variable.  */

>  #if IS_IN (libc) || IS_IN (libpthread)

>  # define __libc_lock_unlock(NAME) \

> @@ -163,19 +160,13 @@ _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");

>  #endif

>  #define __libc_rwlock_unlock(NAME) __pthread_rwlock_unlock (&(NAME))

>  

> -#ifdef SHARED

> -# define __rtld_lock_default_lock_recursive(lock) \

> -  ++((pthread_mutex_t *)(lock))->__data.__count;

> -

> -# define __rtld_lock_default_unlock_recursive(lock) \

> -  --((pthread_mutex_t *)(lock))->__data.__count;

> -

> +#if IS_IN (rtld)

>  # define __rtld_lock_lock_recursive(NAME) \

> -  GL(dl_rtld_lock_recursive) (&(NAME).mutex)

> +  ___rtld_mutex_lock (&(NAME).mutex)

>  

>  # define __rtld_lock_unlock_recursive(NAME) \

> -  GL(dl_rtld_unlock_recursive) (&(NAME).mutex)

> -#else

> +  ___rtld_mutex_unlock (&(NAME).mutex)

> +#else /* Not in the dynamic loader.  */

>  # define __rtld_lock_lock_recursive(NAME) \

>    __pthread_mutex_lock (&(NAME).mutex)

>  

> 



-- 
Cheers,
Carlos.
Samuel Thibault via Libc-alpha May 10, 2021, 5:54 a.m. | #2
* Carlos O'Donell:

> On 5/6/21 2:09 PM, Florian Weimer via Libc-alpha wrote:

>> If libpthread is included in libc, it is not necessary to delay

>> initialization of the lock/unlock function pointers until libpthread

>> is loaded.  This eliminates two unprotected function pointers

>> from _rtld_global and removes some initialization code from

>> libpthread.

>

> This version looks good to me, and the early initialization makes it

> logically easier to follow when reading the code. Despite the removal

> of the unprotected function pointesr in _rtld_global, we still need

> some function pointer in order to lookup the function symbols from libc.so

> and remember their values, but data placement is harder to discover than

> the fixed offset from a public symbol.


Eh, the lookup happens before any user code runs, so it really ought to
be safe. 8-)

Thanks,
Florian

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 4f99af626f..d3e909637a 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -66,7 +66,8 @@  elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
 # interpreter and operating independent of libc.
 rtld-routines	= rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
   dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
-  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu
+  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu \
+  dl-mutex
 all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
 
 CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables
diff --git a/elf/dl-mutex.c b/elf/dl-mutex.c
new file mode 100644
index 0000000000..2cd9d49c2e
--- /dev/null
+++ b/elf/dl-mutex.c
@@ -0,0 +1,19 @@ 
+/* Recursive locking implementation for the dynamic loader.  Generic version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* The generic version initialization happpens in dl_main.  */
diff --git a/elf/rtld.c b/elf/rtld.c
index ad325d4c10..a359167f8a 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -857,6 +857,14 @@  rtld_lock_default_unlock_recursive (void *lock)
   __rtld_lock_default_unlock_recursive (lock);
 }
 #endif
+#if PTHREAD_IN_LIBC
+/* Dummy implementation.  See __rtld_mutex_init.  */
+static int
+rtld_mutex_dummy (pthread_mutex_t *lock)
+{
+  return 0;
+}
+#endif
 
 
 static void
@@ -1148,6 +1156,10 @@  dl_main (const ElfW(Phdr) *phdr,
   GL(dl_rtld_lock_recursive) = rtld_lock_default_lock_recursive;
   GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive;
 #endif
+#if PTHREAD_IN_LIBC
+  ___rtld_mutex_lock = rtld_mutex_dummy;
+  ___rtld_mutex_unlock = rtld_mutex_dummy;
+#endif
 
   /* The explicit initialization here is cheaper than processing the reloc
      in the _rtld_local definition's initializer.  */
@@ -2363,6 +2375,9 @@  dl_main (const ElfW(Phdr) *phdr,
 	 loader.  */
       __rtld_malloc_init_real (main_map);
 
+      /* Likewise for the locking implementation.  */
+      __rtld_mutex_init ();
+
       /* Mark all the objects so we know they have been already relocated.  */
       for (struct link_map *l = main_map; l != NULL; l = l->l_next)
 	{
@@ -2468,6 +2483,9 @@  dl_main (const ElfW(Phdr) *phdr,
 	 at this point.  */
       __rtld_malloc_init_real (main_map);
 
+      /* Likewise for the locking implementation.  */
+      __rtld_mutex_init ();
+
       RTLD_TIMING_VAR (start);
       rtld_timer_start (&start);
 
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index fcab5a0904..2724770533 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -179,15 +179,6 @@  __pthread_initialize_minimal_internal (void)
   lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 
 #ifdef SHARED
-  /* Make __rtld_lock_{,un}lock_recursive use pthread_mutex_{,un}lock,
-     keep the lock count from the ld.so implementation.  */
-  GL(dl_rtld_lock_recursive) = (void *) __pthread_mutex_lock;
-  GL(dl_rtld_unlock_recursive) = (void *) __pthread_mutex_unlock;
-  unsigned int rtld_lock_count = GL(dl_load_lock).mutex.__data.__count;
-  GL(dl_load_lock).mutex.__data.__count = 0;
-  while (rtld_lock_count-- > 0)
-    __pthread_mutex_lock (&GL(dl_load_lock).mutex);
-
   GL(dl_make_stack_executable_hook) = &__make_stacks_executable;
 #endif
 
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 1b064c5894..6d590d1335 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -403,7 +403,7 @@  struct rtld_global
   struct auditstate _dl_rtld_auditstate[DL_NNS];
 #endif
 
-#if defined SHARED && defined _LIBC_REENTRANT \
+#if !PTHREAD_IN_LIBC && defined SHARED \
     && defined __rtld_lock_default_lock_recursive
   EXTERN void (*_dl_rtld_lock_recursive) (void *);
   EXTERN void (*_dl_rtld_unlock_recursive) (void *);
@@ -1318,6 +1318,29 @@  link_map_audit_state (struct link_map *l, size_t index)
 }
 #endif /* SHARED */
 
+#if PTHREAD_IN_LIBC && defined SHARED
+/* Recursive locking implementation for use within the dynamic loader.
+   Used to define the __rtld_lock_lock_recursive and
+   __rtld_lock_unlock_recursive via <libc-lock.h>.  Initialized to a
+   no-op dummy implementation early.  Similar
+   to GL (dl_rtld_lock_recursive) and GL (dl_rtld_unlock_recursive)
+   in !PTHREAD_IN_LIBC builds.  */
+extern int (*___rtld_mutex_lock) (pthread_mutex_t *) attribute_hidden;
+extern int (*___rtld_mutex_unlock) (pthread_mutex_t *lock) attribute_hidden;
+
+/* Called after libc has been loaded, but before RELRO is activated.
+   Used to initialize the function pointers to the actual
+   implementations.  */
+void __rtld_mutex_init (void) attribute_hidden;
+#else /* !PTHREAD_IN_LIBC */
+static inline void
+__rtld_mutex_init (void)
+{
+  /* The initialization happens later (!PTHREAD_IN_LIBC) or is not
+     needed at all (!SHARED).  */
+}
+#endif /* !PTHREAD_IN_LIBC */
+
 #if THREAD_GSCOPE_IN_TCB
 void __thread_gscope_wait (void) attribute_hidden;
 # define THREAD_GSCOPE_WAIT() __thread_gscope_wait ()
diff --git a/sysdeps/nptl/dl-mutex.c b/sysdeps/nptl/dl-mutex.c
new file mode 100644
index 0000000000..08b71dc21b
--- /dev/null
+++ b/sysdeps/nptl/dl-mutex.c
@@ -0,0 +1,53 @@ 
+/* Recursive locking implementation for the dynamic loader.  NPTL version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Use the mutex implementation in libc (assuming PTHREAD_IN_LIBC).  */
+
+#include <assert.h>
+#include <first-versions.h>
+#include <ldsodefs.h>
+
+__typeof (pthread_mutex_lock) *___rtld_mutex_lock attribute_relro;
+__typeof (pthread_mutex_unlock) *___rtld_mutex_unlock attribute_relro;
+
+void
+__rtld_mutex_init (void)
+{
+  /* There is an implicit assumption here that the lock counters are
+     zero and this function is called while nothing is locked.  For
+     early initialization of the mutex functions this is true because
+     it happens directly in dl_main in elf/rtld.c, and not some ELF
+     constructor while holding loader locks.  */
+
+  struct link_map *libc_map = GL (dl_ns)[LM_ID_BASE].libc_map;
+
+  const ElfW(Sym) *sym
+    = _dl_lookup_direct (libc_map, "pthread_mutex_lock",
+                         0x4f152227, /* dl_new_hash output.  */
+                         FIRST_VERSION_libc_pthread_mutex_lock_STRING,
+                         FIRST_VERSION_libc_pthread_mutex_lock_HASH);
+  assert (sym != NULL);
+  ___rtld_mutex_lock = DL_SYMBOL_ADDRESS (libc_map, sym);
+
+  sym = _dl_lookup_direct (libc_map, "pthread_mutex_unlock",
+                           0x7dd7aaaa, /* dl_new_hash output.  */
+                           FIRST_VERSION_libc_pthread_mutex_unlock_STRING,
+                           FIRST_VERSION_libc_pthread_mutex_unlock_HASH);
+  assert (sym != NULL);
+  ___rtld_mutex_unlock = DL_SYMBOL_ADDRESS (libc_map, sym);
+}
diff --git a/sysdeps/nptl/libc-lockP.h b/sysdeps/nptl/libc-lockP.h
index ae9691d40e..ec7b02bbdd 100644
--- a/sysdeps/nptl/libc-lockP.h
+++ b/sysdeps/nptl/libc-lockP.h
@@ -151,9 +151,6 @@  _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");
   __libc_maybe_call (__pthread_mutex_trylock, (&(NAME)), 0)
 #endif
 
-#define __rtld_lock_trylock_recursive(NAME) \
-  __libc_maybe_call (__pthread_mutex_trylock, (&(NAME).mutex), 0)
-
 /* Unlock the named lock variable.  */
 #if IS_IN (libc) || IS_IN (libpthread)
 # define __libc_lock_unlock(NAME) \
@@ -163,19 +160,13 @@  _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");
 #endif
 #define __libc_rwlock_unlock(NAME) __pthread_rwlock_unlock (&(NAME))
 
-#ifdef SHARED
-# define __rtld_lock_default_lock_recursive(lock) \
-  ++((pthread_mutex_t *)(lock))->__data.__count;
-
-# define __rtld_lock_default_unlock_recursive(lock) \
-  --((pthread_mutex_t *)(lock))->__data.__count;
-
+#if IS_IN (rtld)
 # define __rtld_lock_lock_recursive(NAME) \
-  GL(dl_rtld_lock_recursive) (&(NAME).mutex)
+  ___rtld_mutex_lock (&(NAME).mutex)
 
 # define __rtld_lock_unlock_recursive(NAME) \
-  GL(dl_rtld_unlock_recursive) (&(NAME).mutex)
-#else
+  ___rtld_mutex_unlock (&(NAME).mutex)
+#else /* Not in the dynamic loader.  */
 # define __rtld_lock_lock_recursive(NAME) \
   __pthread_mutex_lock (&(NAME).mutex)