V5 [PATCH 2/2] x86: Add a LD_PRELOAD IFUNC resolver test for CPU_FEATURE_USABLE

Message ID 20180927194327.7683-3-hjl.tools@gmail.com
State New
Headers show
Series
  • V5 [PATCH 2/2] x86: Add a LD_PRELOAD IFUNC resolver test for CPU_FEATURE_USABLE
Related show

Commit Message

H.J. Lu Sept. 27, 2018, 7:43 p.m.
Verify that CPU_FEATURE_USABLE can be used by IFUNC resolver in a
LD_PRELOAD library.

	* sysdeps/x86/Makefile (modules-names): Add tst-x86-platform-mod-4
	and tst-x86-platform-preload-4.
	(LDFLAGS-tst-x86-platform-mod-4.so): New.
	($(objpfx)tst-x86-platform-mod-4.so): Likewise.
	($(objpfx)tst-x86-platform-4): Likewise.
	($(objpfx)tst-x86-platform-4.out): Likewise.
	(tst-x86-platform-4-ENV): Likewise.
---
 sysdeps/x86/Makefile                     | 10 +++-
 sysdeps/x86/tst-x86-platform-4.c         | 54 +++++++++++++++++++++
 sysdeps/x86/tst-x86-platform-mod-4.c     | 37 ++++++++++++++
 sysdeps/x86/tst-x86-platform-preload-4.c | 62 ++++++++++++++++++++++++
 4 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/x86/tst-x86-platform-4.c
 create mode 100644 sysdeps/x86/tst-x86-platform-mod-4.c
 create mode 100644 sysdeps/x86/tst-x86-platform-preload-4.c

-- 
2.17.1

Comments

Florian Weimer Oct. 24, 2018, 9:12 a.m. | #1
* H. J. Lu:

> +static bool

> +ifunc_check_sse2 (void)

> +{

> +  return CPU_FEATURE_USABLE (SSE2);

> +}

> +

> +static bool

> +(*get_check_sse2 (void)) (void)

> +{

> +  return ifunc_check_sse2;

> +}

> +

> +bool check_sse2 (void) __attribute__((ifunc("get_check_sse2")));


The IFUNC resolver does not call CPU_FEATURE_USABLE, so this is not the
test I had in mind.

The patch below fixes this, and adds something that actually violates
the ordering constraints.  With these changes, I get this when building
glibc with --enable-bind-now:

elf/tst-x86-platform-4: Relink `./libc.so.6' with `elf/tst-x86-platform-preload-4.so' for IFUNC symbol `free'
Segmentation fault (core dumped)

I think using CPU_FEATURE_USABLE in a malloc implementation is something
that could be quite natural, so I think this really should be fixed in
some way.

Thanks,
Florian

diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 1e759d3efc..773b9137b3 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -22,6 +22,9 @@ $(objpfx)tst-x86-platform-mod-4.so: $(libsupport)
 $(objpfx)tst-x86-platform-4: $(objpfx)tst-x86-platform-mod-4.so
 $(objpfx)tst-x86-platform-4.out: $(objpfx)tst-x86-platform-preload-4.so
 tst-x86-platform-4-ENV = LD_PRELOAD=$(objpfx)tst-x86-platform-preload-4.so
+LDFLAGS-tst-x86-platform-preload-4.so = -Wl,-z,now
+LDFLAGS-tst-x86-platform-4.so = -Wl,-z,now
+LDFLAGS-tst-x86-platform-4 = -Wl,-z,now
 endif
 
 ifeq ($(subdir),misc)
diff --git a/sysdeps/x86/tst-x86-platform-4.c b/sysdeps/x86/tst-x86-platform-4.c
index fe7bd59b82..fb4e86b566 100644
--- a/sysdeps/x86/tst-x86-platform-4.c
+++ b/sysdeps/x86/tst-x86-platform-4.c
@@ -16,14 +16,18 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <stdlib.h>
-#include <stdio.h>
 #include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xstdio.h>
 #include <sys/platform/x86.h>
 
 extern bool check_sse2 (void);
 extern bool check_avx (void);
 extern bool check_avx2 (void);
+extern int get_expected_free_variant (void);
+extern volatile int free_variant_called;
 
 static int
 do_test (void)
@@ -48,6 +52,22 @@ do_test (void)
       failed = true;
     }
 
+  /* Determine if the correct free function was selected.  */
+  int expected_variant = get_expected_free_variant ();
+  free (NULL);
+  TEST_COMPARE (expected_variant, free_variant_called);
+
+  /* Same for a free within libc.so.6.  */
+  {
+    FILE * fp = xfopen ("/dev/null", "r");
+    free_variant_called = 0;
+    xfclose (fp);
+    if (free_variant_called == 0)
+      puts ("warning: fclose did not call free");
+    else
+      TEST_COMPARE (expected_variant, free_variant_called);
+  }
+
   return failed ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 
diff --git a/sysdeps/x86/tst-x86-platform-mod-4.c b/sysdeps/x86/tst-x86-platform-mod-4.c
index 789389ce3e..031634c615 100644
--- a/sysdeps/x86/tst-x86-platform-mod-4.c
+++ b/sysdeps/x86/tst-x86-platform-mod-4.c
@@ -35,3 +35,11 @@ check_avx2 (void)
 {
   return false;
 }
+
+int
+get_expected_free_variant (void)
+{
+  return -1;
+}
+
+volatile int free_variant_called;
diff --git a/sysdeps/x86/tst-x86-platform-preload-4.c b/sysdeps/x86/tst-x86-platform-preload-4.c
index d1216b90e9..6cb5b82377 100644
--- a/sysdeps/x86/tst-x86-platform-preload-4.c
+++ b/sysdeps/x86/tst-x86-platform-preload-4.c
@@ -18,45 +18,118 @@
 
 #include <stdbool.h>
 #include <sys/platform/x86.h>
+#include <stdlib.h>
 
 static bool
-ifunc_check_sse2 (void)
+true_function (void)
 {
-  return CPU_FEATURE_USABLE (SSE2);
+  return true;
 }
 
 static bool
-(*get_check_sse2 (void)) (void)
+false_function (void)
 {
-  return ifunc_check_sse2;
+  return false;
 }
 
-bool check_sse2 (void) __attribute__((ifunc("get_check_sse2")));
-
 static bool
-ifunc_check_avx (void)
+(*get_check_sse2 (void)) (void)
 {
-  return CPU_FEATURE_USABLE (AVX);
+  if (CPU_FEATURE_USABLE (SSE2))
+    return true_function;
+  else
+    return false_function;
 }
 
+bool check_sse2 (void) __attribute__ ((ifunc ("get_check_sse2")));
+
 static bool
 (*get_check_avx (void)) (void)
 {
-  return ifunc_check_avx;
+  if (CPU_FEATURE_USABLE (AVX))
+    return true_function;
+  else
+    return false_function;
 }
 
-bool check_avx (void) __attribute__((ifunc("get_check_avx")));
+bool check_avx (void) __attribute__ ((ifunc ("get_check_avx")));
 
 static bool
-ifunc_check_avx2 (void)
+(*get_check_avx2 (void)) (void)
 {
-  return CPU_FEATURE_USABLE (AVX2);
+  if (CPU_FEATURE_USABLE (AVX2))
+    return true_function;
+  else
+    return false_function;
 }
 
-static bool
-(*get_check_avx2 (void)) (void)
+bool check_avx2 (void) __attribute__ ((ifunc ("get_check_avx2")));
+
+/* The following is used to check what happens when the free function
+   in libc.so.6 is interposed.  This implementation simply does not
+   free any memory, to avoid the need for a complete malloc.  It
+   records the variant called in free_variant_called, so that it is
+   possible to check the selected implementation by calling the free
+   function.  */
+
+volatile int free_variant_called;
+
+void
+free_fallback (void *ignored)
+{
+  free_variant_called = 1;
+}
+
+void
+free_with_see (void *ignored)
+{
+  free_variant_called = 2;
+}
+
+void
+free_with_avx (void *ignored)
+{
+  free_variant_called = 3;
+}
+
+void
+free_with_avx2 (void *ignored)
+{
+  free_variant_called = 4;
+}
+
+void
+free_with_rtm (void *ignored)
+{
+  free_variant_called = 5;
+}
+
+int
+get_expected_free_variant (void)
+{
+  if (CPU_FEATURE_USABLE (RTM))
+    return 5;
+  if (CPU_FEATURE_USABLE (AVX2))
+    return 4;
+  if (CPU_FEATURE_USABLE (AVX))
+    return 3;
+  if (CPU_FEATURE_USABLE (SSE))
+    return 2;
+  return 1;
+}
+
+static __typeof__ (free) *
+get_free (void)
 {
-  return ifunc_check_avx2;
+  if (CPU_FEATURE_USABLE (RTM))
+    return free_with_rtm;
+  if (CPU_FEATURE_USABLE (AVX2))
+    return free_with_avx2;
+  if (CPU_FEATURE_USABLE (AVX))
+    return free_with_avx;
+  if (CPU_FEATURE_USABLE (SSE))
+    return free_with_see;
+  return free_fallback;
 }
 
-bool check_avx2 (void) __attribute__((ifunc("get_check_avx2")));
+void free (void *) __attribute__ ((ifunc ("get_free")));
H.J. Lu Oct. 24, 2018, 11 a.m. | #2
On 10/24/18, Florian Weimer <fweimer@redhat.com> wrote:
> * H. J. Lu:

>

>> +static bool

>> +ifunc_check_sse2 (void)

>> +{

>> +  return CPU_FEATURE_USABLE (SSE2);

>> +}

>> +

>> +static bool

>> +(*get_check_sse2 (void)) (void)

>> +{

>> +  return ifunc_check_sse2;

>> +}

>> +

>> +bool check_sse2 (void) __attribute__((ifunc("get_check_sse2")));

>

> The IFUNC resolver does not call CPU_FEATURE_USABLE, so this is not the

> test I had in mind.

>

> The patch below fixes this, and adds something that actually violates

> the ordering constraints.  With these changes, I get this when building

> glibc with --enable-bind-now:

>

> elf/tst-x86-platform-4: Relink `./libc.so.6' with

> `elf/tst-x86-platform-preload-4.so' for IFUNC symbol `free'

> Segmentation fault (core dumped)

>

> I think using CPU_FEATURE_USABLE in a malloc implementation is something

> that could be quite natural, so I think this really should be fixed in

> some way.

>

>

...

> +

> +static __typeof__ (free) *

> +get_free (void)

>  {

> -  return ifunc_check_avx2;

> +  if (CPU_FEATURE_USABLE (RTM))

> +    return free_with_rtm;

> +  if (CPU_FEATURE_USABLE (AVX2))

> +    return free_with_avx2;

> +  if (CPU_FEATURE_USABLE (AVX))

> +    return free_with_avx;

> +  if (CPU_FEATURE_USABLE (SSE))

> +    return free_with_see;

> +  return free_fallback;

>  }

>

> -bool check_avx2 (void) __attribute__((ifunc("get_check_avx2")));

> +void free (void *) __attribute__ ((ifunc ("get_free")));

>


I guess you knew that this issue was independent of my new functions.
You will get the same error regardless of what the get_free body has.


-- 
H.J.
Florian Weimer Oct. 24, 2018, 11:06 a.m. | #3
* H. J. Lu:

> I guess you knew that this issue was independent of my new functions.

> You will get the same error regardless of what the get_free body has.


Yes, the check is certainly overly conservative.  I thought we want to
remove it.  Don't we trigger it in glibc in a few places?  If the check
is gone, then I think we will see incorrect results from the new
interface.

I think we are very consistent right now when it comes to relocations in
IFUNC handlers.  I want to see this settled before adding something that
requires a relocation which is (among other things) targeted at IFUNC
resolvers.

Thanks,
Florian
H.J. Lu Oct. 24, 2018, 12:38 p.m. | #4
On 10/24/18, Florian Weimer <fweimer@redhat.com> wrote:
> * H. J. Lu:

>

>> I guess you knew that this issue was independent of my new functions.

>> You will get the same error regardless of what the get_free body has.

>

> Yes, the check is certainly overly conservative.  I thought we want to

> remove it.  Don't we trigger it in glibc in a few places?  If the check

> is gone, then I think we will see incorrect results from the new

> interface.

>

> I think we are very consistent right now when it comes to relocations in

> IFUNC handlers.  I want to see this settled before adding something that

> requires a relocation which is (among other things) targeted at IFUNC

> resolvers.

>


<sys/platform/x86.h> isn't targeted for IFUNC.   My first use is to add
x86_tsc_to_ns and x86_ns_to_tsc.   I am enclosing 2 patches here.

-- 
H.J.
From 0eda869020f2c77a9d3a850b9ef1d739039a340a Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 26 Jul 2018 12:45:18 -0700
Subject: [PATCH 1/2] x86: Add COMMON_CPUID_INDEX_[D_ECX_1|80000007|80000008]

Add COMMON_CPUID_INDEX_D_ECX_1, COMMON_CPUID_INDEX_80000007 and
COMMON_CPUID_INDEX_80000008 to check CPU feature bits in them.

	* manual/platform.texi: Add entries for INVARIANT_TSC, WBNOINVD,
	XGETBV_ECX_1, XSAVEC, XSAVEOPT and XSAVES.
	* sysdeps/x86/cpu-features.c (get_extended_indeces): Also
	populate COMMON_CPUID_INDEX_80000007 and
	COMMON_CPUID_INDEX_80000008.
	(get_common_indeces): Also populate COMMON_CPUID_INDEX_D_ECX_1.
	Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if
	XSAVEC is available.
	* sysdeps/x86/sys/platform/x86.h (bit_arch_XSAVEOPT_Usable): New.
	(bit_arch_XGETBV_ECX_1_Usable): Likewise.
	(bit_arch_XSAVES_Usable): Likewise.
	(bit_arch_INVARIANT_TSC_Usable): Likewise.
	(bit_arch_WBNOINVD_Usable): Likewise.
	(index_arch_XSAVEOPT_Usable): Likewise.
	(index_arch_XGETBV_ECX_1_Usable): Likewise.
	(index_arch_XSAVES_Usable): Likewise.
	(index_arch_INVARIANT_TSC_Usable): Likewise.
	(index_arch_WBNOINVD_Usable): Likewise.
	(need_arch_feature_XSAVEOPT): Likewise.
	(need_arch_feature_XSAVEC): Likewise.
	(need_arch_feature_XGETBV_ECX_1): Likewise.
	(need_arch_feature_XSAVES): Likewise.
	(need_arch_feature_INVARIANT_TSC): Likewise.
	(need_arch_feature_WBNOINVD): Likewise.
	(bit_cpu_XSAVEOPT): Likewise.
	(bit_cpu_XSAVEC): Likewise.
	(bit_cpu_XGETBV_ECX_1): Likewise.
	(bit_cpu_XSAVES): Likewise.
	(bit_cpu_INVARIANT_TSC): Likewise.
	(bit_cpu_WBNOINVD): Likewise.
	(index_cpu_XSAVEOPT): Likewise.
	(index_cpu_XSAVEC): Likewise.
	(index_cpu_XGETBV_ECX_1): Likewise.
	(index_cpu_XSAVES): Likewise.
	(index_cpu_INVARIANT_TSC): Likewise.
	(index_cpu_WBNOINVD): Likewise.
	(reg_XSAVEOPT): Likewise.
	(reg_XSAVEC): Likewise.
	(reg_XGETBV_ECX_1): Likewise.
	(reg_XSAVES): Likewise.
	(reg_INVARIANT_TSC): Likewise.
	(reg_WBNOINVD): Likewise.
	(COMMON_CPUID_INDEX_D_ECX_1): Likewise.
	(COMMON_CPUID_INDEX_80000007): Likewise.
	(COMMON_CPUID_INDEX_80000008): Likewise.
	* sysdeps/x86/tst-x86-platform-1.c (do_test): Also check
	XSAVEOPT, XSAVEC, XGETBV_ECX_1, XSAVES, INVARIANT_TSC and
	WBNOINVD.
---
 manual/platform.texi             | 20 ++++++++-
 sysdeps/x86/cpu-features.c       | 24 +++++++++--
 sysdeps/x86/sys/platform/x86.h   | 73 ++++++++++++++++++++++++++++++++
 sysdeps/x86/tst-x86-platform-1.c | 12 ++++++
 4 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/manual/platform.texi b/manual/platform.texi
index a2d9cc79fe..b0faa61a67 100644
--- a/manual/platform.texi
+++ b/manual/platform.texi
@@ -305,6 +305,9 @@ by @code{CPUID} instruction.  The available features are:
 @item
 @code{IBT} -- Intel Indirect Branch Tracking instruction extensions.
 
+@item
+@code{INVARIANT_TSC} -- Invariant TSC.
+
 @item
 @code{INVPCID} -- INVPCID instruction.
 
@@ -527,9 +530,15 @@ using a TSC deadline value.
 @item
 @code{WAITPKG} -- WAITPKG instruction extensions.
 
+@item
+@code{WBNOINVD} -- WBNOINVD instruction.
+
 @item
 @code{X2APIC} -- x2APIC.
 
+@item
+@code{XGETBV_ECX_1} -- XGETBV with ECX = 1.
+
 @item
 @code{XOP} -- XOP instruction extensions.
 
@@ -537,6 +546,15 @@ using a TSC deadline value.
 @code{XSAVE} -- The XSAVE/XRSTOR processor extended states feature, the
 XSETBV/XGETBV instructions, and XCR0.
 
+@item
+@code{XSAVEC} -- XSAVEC instruction.
+
+@item
+@code{XSAVEOPT} -- XSAVEOPT instruction.
+
+@item
+@code{XSAVES} -- XSAVES/XRSTORS instructions.
+
 @item
 @code{XTPRUPDCTRL} -- xTPR Update Control.
 
@@ -588,7 +606,7 @@ The supported features are:
 @item
 @code{TBM} @tab @code{TSC} @tab @code{VAES} @tab @code{VPCLMULQDQ}
 @item
-@code{XOP} @tab @code{XSAVE}
+@code{XOP} @tab @code{XSAVE} @tab @code{XSAVEC}
 @end multitable
 
 @end defmac
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 5f6ac6664c..f812f406fd 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -53,7 +53,18 @@ get_extended_indices (struct cpu_features *cpu_features)
 	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
 	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
 	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
-
+  if (eax >= 0x80000007)
+    __cpuid (0x80000007,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx);
+  if (eax >= 0x80000008)
+    __cpuid (0x80000008,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx);
 }
 
 static void
@@ -86,6 +97,13 @@ get_common_indices (struct cpu_features *cpu_features,
 		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
 		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
 
+  if (cpu_features->max_cpuid >= 0xd)
+    __cpuid_count (0xd, 1,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx);
+
   /* Can we call xgetbv?  */
   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
     {
@@ -219,10 +237,8 @@ get_common_indices (struct cpu_features *cpu_features,
 	      cpu_features->xsave_state_full_size
 		= xsave_state_full_size;
 
-	      __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
-
 	      /* Check if XSAVEC is available.  */
-	      if ((eax & (1 << 1)) != 0)
+	      if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
 		{
 		  unsigned int xstate_comp_offsets[32];
 		  unsigned int xstate_comp_sizes[32];
diff --git a/sysdeps/x86/sys/platform/x86.h b/sysdeps/x86/sys/platform/x86.h
index 48e42ae983..a2c5c7074d 100644
--- a/sysdeps/x86/sys/platform/x86.h
+++ b/sysdeps/x86/sys/platform/x86.h
@@ -38,6 +38,9 @@ enum
     COMMON_CPUID_INDEX_1 = 0,
     COMMON_CPUID_INDEX_7,
     COMMON_CPUID_INDEX_80000001,
+    COMMON_CPUID_INDEX_D_ECX_1,
+    COMMON_CPUID_INDEX_80000007,
+    COMMON_CPUID_INDEX_80000008,
     /* Keep the following line at the end.  */
     COMMON_CPUID_INDEX_MAX
   };
@@ -169,6 +172,11 @@ extern unsigned int x86_get_arch_feature (unsigned int)
 #define bit_arch_TBM_Usable			(1u << 0)
 #define bit_arch_SYSCALL_SYSRET_Usable		(1u << 0)
 #define bit_arch_RDTSCP_Usable			(1u << 0)
+#define bit_arch_XSAVEOPT_Usable		(1u << 0)
+#define bit_arch_XGETBV_ECX_1_Usable		(1u << 0)
+#define bit_arch_XSAVES_Usable			(1u << 0)
+#define bit_arch_INVARIANT_TSC_Usable		(1u << 0)
+#define bit_arch_WBNOINVD_Usable		(1u << 0)
 
 /* Unused.  Compiler will optimize them out.  */
 #define index_arch_SSE3_Usable			FEATURE_INDEX_1
@@ -220,6 +228,11 @@ extern unsigned int x86_get_arch_feature (unsigned int)
 #define index_arch_TBM_Usable			FEATURE_INDEX_1
 #define index_arch_SYSCALL_SYSRET_Usable	FEATURE_INDEX_1
 #define index_arch_RDTSCP_Usable		FEATURE_INDEX_1
+#define index_arch_XSAVEOPT_Usable		FEATURE_INDEX_1
+#define index_arch_XGETBV_ECX_1_Usable		FEATURE_INDEX_1
+#define index_arch_XSAVES_Usable		FEATURE_INDEX_1
+#define index_arch_INVARIANT_TSC_Usable		FEATURE_INDEX_1
+#define index_arch_WBNOINVD_Usable		FEATURE_INDEX_1
 
 /* COMMON_CPUID_INDEX_1.  */
 
@@ -309,6 +322,12 @@ extern unsigned int x86_get_arch_feature (unsigned int)
 #define need_arch_feature_TBM			0
 #define need_arch_feature_SYSCALL_SYSRET	0
 #define need_arch_feature_RDTSCP		0
+#define need_arch_feature_XSAVEOPT		0
+#define need_arch_feature_XSAVEC		1
+#define need_arch_feature_XGETBV_ECX_1		0
+#define need_arch_feature_XSAVES		0
+#define need_arch_feature_INVARIANT_TSC		0
+#define need_arch_feature_WBNOINVD		0
 
 /* CPU features.  */
 
@@ -458,6 +477,24 @@ extern unsigned int x86_get_arch_feature (unsigned int)
 #define bit_cpu_RDTSCP		(1u << 27)
 #define bit_cpu_LM		(1u << 29)
 
+/* COMMON_CPUID_INDEX_D_ECX_1.  */
+
+/* EAX.  */
+#define bit_cpu_XSAVEOPT	(1u << 0)
+#define bit_cpu_XSAVEC		(1u << 1)
+#define bit_cpu_XGETBV_ECX_1	(1u << 2)
+#define bit_cpu_XSAVES		(1u << 3)
+
+/* COMMON_CPUID_INDEX_80000007.  */
+
+/* EDX.  */
+#define bit_cpu_INVARIANT_TSC	(1u << 8)
+
+/* COMMON_CPUID_INDEX_80000008.  */
+
+/* EBX.  */
+#define bit_cpu_WBNOINVD	(1u << 9)
+
 /* COMMON_CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -604,6 +641,24 @@ extern unsigned int x86_get_arch_feature (unsigned int)
 #define index_cpu_RDTSCP	COMMON_CPUID_INDEX_80000001
 #define index_cpu_LM		COMMON_CPUID_INDEX_80000001
 
+/* COMMON_CPUID_INDEX_D_ECX_1.  */
+
+/* EAX.  */
+#define index_cpu_XSAVEOPT	COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XSAVEC	COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XGETBV_ECX_1	COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XSAVES	COMMON_CPUID_INDEX_D_ECX_1
+
+/* COMMON_CPUID_INDEX_80000007.  */
+
+/* EDX.  */
+#define index_cpu_INVARIANT_TSC	COMMON_CPUID_INDEX_80000007
+
+/* COMMON_CPUID_INDEX_80000008.  */
+
+/* EBX.  */
+#define index_cpu_WBNOINVD	COMMON_CPUID_INDEX_80000008
+
 /* COMMON_CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -750,6 +805,24 @@ extern unsigned int x86_get_arch_feature (unsigned int)
 #define reg_RDTSCP		edx
 #define reg_LM			edx
 
+/* COMMON_CPUID_INDEX_D_ECX_1.  */
+
+/* EAX.  */
+#define reg_XSAVEOPT		eax
+#define reg_XSAVEC		eax
+#define reg_XGETBV_ECX_1	eax
+#define reg_XSAVES		eax
+
+/* COMMON_CPUID_INDEX_80000007.  */
+
+/* EDX.  */
+#define reg_INVARIANT_TSC	edx
+
+/* COMMON_CPUID_INDEX_80000008.  */
+
+/* EBX.  */
+#define reg_WBNOINVD		ebx
+
 __END_DECLS
 
 #endif  /* _SYS_PLATFORM_X86_H */
diff --git a/sysdeps/x86/tst-x86-platform-1.c b/sysdeps/x86/tst-x86-platform-1.c
index 7c13d65ab1..56c2c8c8c1 100644
--- a/sysdeps/x86/tst-x86-platform-1.c
+++ b/sysdeps/x86/tst-x86-platform-1.c
@@ -173,6 +173,12 @@ do_test (void)
   CHECK_CPU_FEATURE (PAGE1GB);
   CHECK_CPU_FEATURE (RDTSCP);
   CHECK_CPU_FEATURE (LM);
+  CHECK_CPU_FEATURE (XSAVEOPT);
+  CHECK_CPU_FEATURE (XSAVEC);
+  CHECK_CPU_FEATURE (XGETBV_ECX_1);
+  CHECK_CPU_FEATURE (XSAVES);
+  CHECK_CPU_FEATURE (INVARIANT_TSC);
+  CHECK_CPU_FEATURE (WBNOINVD);
 
   printf ("Usable CPU features:\n");
   CHECK_CPU_FEATURE_USABLE (SSE3);
@@ -245,6 +251,12 @@ do_test (void)
   CHECK_CPU_FEATURE_USABLE (TBM);
   CHECK_CPU_FEATURE_USABLE (SYSCALL_SYSRET);
   CHECK_CPU_FEATURE_USABLE (RDTSCP);
+  CHECK_CPU_FEATURE_USABLE (XSAVEOPT);
+  CHECK_CPU_FEATURE_USABLE (XSAVEC);
+  CHECK_CPU_FEATURE_USABLE (XGETBV_ECX_1);
+  CHECK_CPU_FEATURE_USABLE (XSAVES);
+  CHECK_CPU_FEATURE_USABLE (INVARIANT_TSC);
+  CHECK_CPU_FEATURE_USABLE (WBNOINVD);
 
   return 0;
 }
Florian Weimer Oct. 24, 2018, 12:43 p.m. | #5
* H. J. Lu:

> On 10/24/18, Florian Weimer <fweimer@redhat.com> wrote:

>> * H. J. Lu:

>>

>>> I guess you knew that this issue was independent of my new functions.

>>> You will get the same error regardless of what the get_free body has.

>>

>> Yes, the check is certainly overly conservative.  I thought we want to

>> remove it.  Don't we trigger it in glibc in a few places?  If the check

>> is gone, then I think we will see incorrect results from the new

>> interface.

>>

>> I think we are very consistent right now when it comes to relocations in

>> IFUNC handlers.  I want to see this settled before adding something that

>> requires a relocation which is (among other things) targeted at IFUNC

>> resolvers.

>>

>

> <sys/platform/x86.h> isn't targeted for IFUNC.   My first use is to add

> x86_tsc_to_ns and x86_ns_to_tsc.   I am enclosing 2 patches here.


That's a generic interface which should rely on internal CPU flags.
What's worse, the cached flag isn't updated by the kernel TSC watchdog,
so applications will use a known-broken time source.

What's wrong with clock_gettime?  It handles all these details.

Thanks,
Florian
H.J. Lu Oct. 24, 2018, 10:57 p.m. | #6
On 10/24/18, Florian Weimer <fweimer@redhat.com> wrote:
> * H. J. Lu:

>

>> On 10/24/18, Florian Weimer <fweimer@redhat.com> wrote:

>>> * H. J. Lu:

>>>

>>>> I guess you knew that this issue was independent of my new functions.

>>>> You will get the same error regardless of what the get_free body has.

>>>

>>> Yes, the check is certainly overly conservative.  I thought we want to

>>> remove it.  Don't we trigger it in glibc in a few places?  If the check

>>> is gone, then I think we will see incorrect results from the new

>>> interface.

>>>

>>> I think we are very consistent right now when it comes to relocations in

>>> IFUNC handlers.  I want to see this settled before adding something that

>>> requires a relocation which is (among other things) targeted at IFUNC

>>> resolvers.

>>>

>>

>> <sys/platform/x86.h> isn't targeted for IFUNC.   My first use is to add

>> x86_tsc_to_ns and x86_ns_to_tsc.   I am enclosing 2 patches here.

>

> That's a generic interface which should rely on internal CPU flags.

> What's worse, the cached flag isn't updated by the kernel TSC watchdog,

> so applications will use a known-broken time source.

>

> What's wrong with clock_gettime?  It handles all these details.

>


These are for cases where RDTSC/RDTSCP are preferred over
clock_gettime.


-- 
H.J.

Patch

diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 272d8f0b89..1e759d3efc 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -7,8 +7,9 @@  sysdep-dl-routines += dl-get-cpu-features
 
 tests += tst-get-cpu-features tst-get-cpu-features-static \
 	 tst-x86-platform-1 tst-x86-platform-1-static \
-	 tst-x86-platform-2 tst-x86-platform-3
-modules-names += tst-x86-platform-mod-2 tst-x86-platform-mod-3
+	 tst-x86-platform-2 tst-x86-platform-3 tst-x86-platform-4
+modules-names += tst-x86-platform-mod-2 tst-x86-platform-mod-3 \
+		 tst-x86-platform-mod-4 tst-x86-platform-preload-4
 tests-static += tst-get-cpu-features-static tst-x86-platform-1-static
 
 $(objpfx)tst-x86-platform-mod-2.so: $(libsupport)
@@ -16,6 +17,11 @@  $(objpfx)tst-x86-platform-2: $(objpfx)tst-x86-platform-mod-2.so
 LDFLAGS-tst-x86-platform-mod-3.so = -Wl,-z,now
 $(objpfx)tst-x86-platform-mod-3.so: $(libsupport)
 $(objpfx)tst-x86-platform-3: $(objpfx)tst-x86-platform-mod-3.so
+LDFLAGS-tst-x86-platform-mod-4.so = -Wl,-z,now
+$(objpfx)tst-x86-platform-mod-4.so: $(libsupport)
+$(objpfx)tst-x86-platform-4: $(objpfx)tst-x86-platform-mod-4.so
+$(objpfx)tst-x86-platform-4.out: $(objpfx)tst-x86-platform-preload-4.so
+tst-x86-platform-4-ENV = LD_PRELOAD=$(objpfx)tst-x86-platform-preload-4.so
 endif
 
 ifeq ($(subdir),misc)
diff --git a/sysdeps/x86/tst-x86-platform-4.c b/sysdeps/x86/tst-x86-platform-4.c
new file mode 100644
index 0000000000..fe7bd59b82
--- /dev/null
+++ b/sysdeps/x86/tst-x86-platform-4.c
@@ -0,0 +1,54 @@ 
+/* Test case for x86 <sys/platform/x86.h>.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/platform/x86.h>
+
+extern bool check_sse2 (void);
+extern bool check_avx (void);
+extern bool check_avx2 (void);
+
+static int
+do_test (void)
+{
+  bool failed = false;
+
+  if (CPU_FEATURE_USABLE (SSE2) != check_sse2 ())
+    {
+      printf ("LD_PRELOAD SSE2 check failed\n");
+      failed = true;
+    }
+
+  if (CPU_FEATURE_USABLE (AVX) != check_avx ())
+    {
+      printf ("LD_PRELOAD AVX check failed\n");
+      failed = true;
+    }
+
+  if (CPU_FEATURE_USABLE (AVX2) != check_avx2 ())
+    {
+      printf ("LD_PRELOAD AVX2 check failed\n");
+      failed = true;
+    }
+
+  return failed ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/x86/tst-x86-platform-mod-4.c b/sysdeps/x86/tst-x86-platform-mod-4.c
new file mode 100644
index 0000000000..789389ce3e
--- /dev/null
+++ b/sysdeps/x86/tst-x86-platform-mod-4.c
@@ -0,0 +1,37 @@ 
+/* Test case for x86 <sys/platform/x86.h>.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+
+bool
+check_sse2 (void)
+{
+  return false;
+}
+
+bool
+check_avx (void)
+{
+  return false;
+}
+
+bool
+check_avx2 (void)
+{
+  return false;
+}
diff --git a/sysdeps/x86/tst-x86-platform-preload-4.c b/sysdeps/x86/tst-x86-platform-preload-4.c
new file mode 100644
index 0000000000..d1216b90e9
--- /dev/null
+++ b/sysdeps/x86/tst-x86-platform-preload-4.c
@@ -0,0 +1,62 @@ 
+/* Test case for x86 <sys/platform/x86.h>.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+#include <sys/platform/x86.h>
+
+static bool
+ifunc_check_sse2 (void)
+{
+  return CPU_FEATURE_USABLE (SSE2);
+}
+
+static bool
+(*get_check_sse2 (void)) (void)
+{
+  return ifunc_check_sse2;
+}
+
+bool check_sse2 (void) __attribute__((ifunc("get_check_sse2")));
+
+static bool
+ifunc_check_avx (void)
+{
+  return CPU_FEATURE_USABLE (AVX);
+}
+
+static bool
+(*get_check_avx (void)) (void)
+{
+  return ifunc_check_avx;
+}
+
+bool check_avx (void) __attribute__((ifunc("get_check_avx")));
+
+static bool
+ifunc_check_avx2 (void)
+{
+  return CPU_FEATURE_USABLE (AVX2);
+}
+
+static bool
+(*get_check_avx2 (void)) (void)
+{
+  return ifunc_check_avx2;
+}
+
+bool check_avx2 (void) __attribute__((ifunc("get_check_avx2")));