x86: Use RTM intrinsics in pthread mutex lock elision

Message ID 20181001220831.7420-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x86: Use RTM intrinsics in pthread mutex lock elision
Related show

Commit Message

H.J. Lu Oct. 1, 2018, 10:08 p.m.
Since RTM intrinsics are supported in GCC 4.9, we can use them in
pthread mutex lock elision.

	* sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
	Add -mrtm.
	(CFLAGS-elision-unlock.c): Likewise.
	(CFLAGS-elision-timed.c): Likewise.
	(CFLAGS-elision-trylock.c): Likewise.
	* sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
---
 sysdeps/unix/sysv/linux/x86/Makefile |  4 ++
 sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------
 2 files changed, 7 insertions(+), 67 deletions(-)

-- 
2.17.1

Comments

Adhemerval Zanella Oct. 2, 2018, 1:06 p.m. | #1
On 01/10/2018 19:08, H.J. Lu wrote:
> Since RTM intrinsics are supported in GCC 4.9, we can use them in

> pthread mutex lock elision.

> 

> 	* sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):

> 	Add -mrtm.

> 	(CFLAGS-elision-unlock.c): Likewise.

> 	(CFLAGS-elision-timed.c): Likewise.

> 	(CFLAGS-elision-trylock.c): Likewise.

> 	* sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.


LGTM, thanks.

> ---

>  sysdeps/unix/sysv/linux/x86/Makefile |  4 ++

>  sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------

>  2 files changed, 7 insertions(+), 67 deletions(-)

> 

> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile

> index 7dc4e61756..02ca36c6d2 100644

> --- a/sysdeps/unix/sysv/linux/x86/Makefile

> +++ b/sysdeps/unix/sysv/linux/x86/Makefile

> @@ -14,6 +14,10 @@ endif

>  ifeq ($(subdir),nptl)

>  libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \

>  			      elision-trylock

> +CFLAGS-elision-lock.c += -mrtm

> +CFLAGS-elision-unlock.c += -mrtm

> +CFLAGS-elision-timed.c += -mrtm

> +CFLAGS-elision-trylock.c += -mrtm

>  endif

>  

>  ifeq ($(subdir),elf)

> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h

> index 4a7b9e3bf7..0449026839 100644

> --- a/sysdeps/unix/sysv/linux/x86/hle.h

> +++ b/sysdeps/unix/sysv/linux/x86/hle.h

> @@ -1,75 +1,11 @@

> -/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers

> -   that do not support the intrinsics and instructions yet.  */

> +/* Shared RTM header.  */

>  #ifndef _HLE_H

>  #define _HLE_H 1

>  

> -#ifdef __ASSEMBLER__

> +#include <x86intrin.h>


Is it used in any configuration in assembly code?
H.J. Lu Oct. 2, 2018, 1:49 p.m. | #2
On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>

>

>

> On 01/10/2018 19:08, H.J. Lu wrote:

> > Since RTM intrinsics are supported in GCC 4.9, we can use them in

> > pthread mutex lock elision.

> >

> >       * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):

> >       Add -mrtm.

> >       (CFLAGS-elision-unlock.c): Likewise.

> >       (CFLAGS-elision-timed.c): Likewise.

> >       (CFLAGS-elision-trylock.c): Likewise.

> >       * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.

>

> LGTM, thanks.

>

> > ---

> >  sysdeps/unix/sysv/linux/x86/Makefile |  4 ++

> >  sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------

> >  2 files changed, 7 insertions(+), 67 deletions(-)

> >

> > diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile

> > index 7dc4e61756..02ca36c6d2 100644

> > --- a/sysdeps/unix/sysv/linux/x86/Makefile

> > +++ b/sysdeps/unix/sysv/linux/x86/Makefile

> > @@ -14,6 +14,10 @@ endif

> >  ifeq ($(subdir),nptl)

> >  libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \

> >                             elision-trylock

> > +CFLAGS-elision-lock.c += -mrtm

> > +CFLAGS-elision-unlock.c += -mrtm

> > +CFLAGS-elision-timed.c += -mrtm

> > +CFLAGS-elision-trylock.c += -mrtm

> >  endif

> >

> >  ifeq ($(subdir),elf)

> > diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h

> > index 4a7b9e3bf7..0449026839 100644

> > --- a/sysdeps/unix/sysv/linux/x86/hle.h

> > +++ b/sysdeps/unix/sysv/linux/x86/hle.h

> > @@ -1,75 +1,11 @@

> > -/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers

> > -   that do not support the intrinsics and instructions yet.  */

> > +/* Shared RTM header.  */

> >  #ifndef _HLE_H

> >  #define _HLE_H 1

> >

> > -#ifdef __ASSEMBLER__

> > +#include <x86intrin.h>

>

> Is it used in any configuration in assembly code?


No:

sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h"
sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h"
sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h"
sysdeps/x86/elide.h:#include <hle.h>

BTW, elide.h isn't used anywhere.

-- 
H.J.
Adhemerval Zanella Oct. 2, 2018, 5:27 p.m. | #3
On 02/10/2018 10:49, H.J. Lu wrote:
> On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella

> <adhemerval.zanella@linaro.org> wrote:

>>

>>

>>

>> On 01/10/2018 19:08, H.J. Lu wrote:

>>> Since RTM intrinsics are supported in GCC 4.9, we can use them in

>>> pthread mutex lock elision.

>>>

>>>       * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):

>>>       Add -mrtm.

>>>       (CFLAGS-elision-unlock.c): Likewise.

>>>       (CFLAGS-elision-timed.c): Likewise.

>>>       (CFLAGS-elision-trylock.c): Likewise.

>>>       * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.

>>

>> LGTM, thanks.

>>

>>> ---

>>>  sysdeps/unix/sysv/linux/x86/Makefile |  4 ++

>>>  sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------

>>>  2 files changed, 7 insertions(+), 67 deletions(-)

>>>

>>> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile

>>> index 7dc4e61756..02ca36c6d2 100644

>>> --- a/sysdeps/unix/sysv/linux/x86/Makefile

>>> +++ b/sysdeps/unix/sysv/linux/x86/Makefile

>>> @@ -14,6 +14,10 @@ endif

>>>  ifeq ($(subdir),nptl)

>>>  libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \

>>>                             elision-trylock

>>> +CFLAGS-elision-lock.c += -mrtm

>>> +CFLAGS-elision-unlock.c += -mrtm

>>> +CFLAGS-elision-timed.c += -mrtm

>>> +CFLAGS-elision-trylock.c += -mrtm

>>>  endif

>>>

>>>  ifeq ($(subdir),elf)

>>> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h

>>> index 4a7b9e3bf7..0449026839 100644

>>> --- a/sysdeps/unix/sysv/linux/x86/hle.h

>>> +++ b/sysdeps/unix/sysv/linux/x86/hle.h

>>> @@ -1,75 +1,11 @@

>>> -/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers

>>> -   that do not support the intrinsics and instructions yet.  */

>>> +/* Shared RTM header.  */

>>>  #ifndef _HLE_H

>>>  #define _HLE_H 1

>>>

>>> -#ifdef __ASSEMBLER__

>>> +#include <x86intrin.h>

>>

>> Is it used in any configuration in assembly code?

> 

> No:

> 

> sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h"

> sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h"

> sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h"

> sysdeps/x86/elide.h:#include <hle.h>

> 

> BTW, elide.h isn't used anywhere.

> 


It was used by HTM lock elision on pthread_rwlock_* before new implementation
(cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c).  Andrew Senkevich has sent a 
patch [1] to re-enable HTM rwlock elision, but his own performance results
seem mixed.

I think we can safely remove all elide.h files from now.

[1] https://sourceware.org/ml/libc-alpha/2017-04/msg00067.html

Patch

diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
index 7dc4e61756..02ca36c6d2 100644
--- a/sysdeps/unix/sysv/linux/x86/Makefile
+++ b/sysdeps/unix/sysv/linux/x86/Makefile
@@ -14,6 +14,10 @@  endif
 ifeq ($(subdir),nptl)
 libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
 			      elision-trylock
+CFLAGS-elision-lock.c += -mrtm
+CFLAGS-elision-unlock.c += -mrtm
+CFLAGS-elision-timed.c += -mrtm
+CFLAGS-elision-trylock.c += -mrtm
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
index 4a7b9e3bf7..0449026839 100644
--- a/sysdeps/unix/sysv/linux/x86/hle.h
+++ b/sysdeps/unix/sysv/linux/x86/hle.h
@@ -1,75 +1,11 @@ 
-/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers
-   that do not support the intrinsics and instructions yet.  */
+/* Shared RTM header.  */
 #ifndef _HLE_H
 #define _HLE_H 1
 
-#ifdef __ASSEMBLER__
+#include <x86intrin.h>
 
-.macro XBEGIN target
-	.byte 0xc7,0xf8
-	.long \target-1f
-1:
-.endm
-
-.macro XEND
-	.byte 0x0f,0x01,0xd5
-.endm
-
-.macro XABORT code
-	.byte 0xc6,0xf8,\code
-.endm
-
-.macro XTEST
-	 .byte 0x0f,0x01,0xd6
-.endm
-
-#endif
-
-/* Official RTM intrinsics interface matching gcc/icc, but works
-   on older gcc compatible compilers and binutils.
-   We should somehow detect if the compiler supports it, because
-   it may be able to generate slightly better code.  */
-
-#define _XBEGIN_STARTED		(~0u)
-#define _XABORT_EXPLICIT	(1 << 0)
-#define _XABORT_RETRY		(1 << 1)
-#define _XABORT_CONFLICT	(1 << 2)
-#define _XABORT_CAPACITY	(1 << 3)
-#define _XABORT_DEBUG		(1 << 4)
-#define _XABORT_NESTED		(1 << 5)
-#define _XABORT_CODE(x)		(((x) >> 24) & 0xff)
-
-#define _ABORT_LOCK_BUSY 	0xff
+#define _ABORT_LOCK_BUSY	0xff
 #define _ABORT_LOCK_IS_LOCKED	0xfe
 #define _ABORT_NESTED_TRYLOCK	0xfd
 
-#ifndef __ASSEMBLER__
-
-#define __force_inline __attribute__((__always_inline__)) inline
-
-static __force_inline int _xbegin(void)
-{
-  int ret = _XBEGIN_STARTED;
-  asm volatile (".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
-  return ret;
-}
-
-static __force_inline void _xend(void)
-{
-  asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory");
-}
-
-static __force_inline void _xabort(const unsigned int status)
-{
-  asm volatile (".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
-}
-
-static __force_inline int _xtest(void)
-{
-  unsigned char out;
-  asm volatile (".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory");
-  return out;
-}
-
-#endif
 #endif