[RFC] allow inline intrinsics for __ieee754_sqrt/f

Message ID 019101d40a16$a3ec7d10$ebc57730$@beniston.com
State New
Headers show
Series
  • [RFC] allow inline intrinsics for __ieee754_sqrt/f
Related show

Commit Message

Jon Beniston June 22, 2018, 10:49 a.m.
Hi,

Most functions in libm call __ieee754_sqrt when needing to perform a square
root. For most targets, this results in the s/w implementation in
math/e_sqrt.c being using, even if the target has a h/w sqrt instruction.
There are some targets that have machine specific implementations in
machine/*/, but even if a single instruction, that code typically doesn't
get inlined.

The following patch is one possible way to allow a sqrt instruction to be
used and for the calls to be inlined. I've just done this for x86/arm for
now. I've put this in include/machine/ieeefp.h, rather than fdlibm.h, as
that's where most of the other target specific code seems to be.

Not sure if using the __IEEE754_INLINE_SQRT* macros is the best way to
prevent redefinition errors. Perhaps someone has a better idea?

Cheers,
Jon

Comments

Corinna Vinschen June 25, 2018, 11:41 a.m. | #1
On Jun 22 11:49, Jon Beniston wrote:
> Hi,

> 

> Most functions in libm call __ieee754_sqrt when needing to perform a square

> root. For most targets, this results in the s/w implementation in

> math/e_sqrt.c being using, even if the target has a h/w sqrt instruction.

> There are some targets that have machine specific implementations in

> machine/*/, but even if a single instruction, that code typically doesn't

> get inlined.

> 

> The following patch is one possible way to allow a sqrt instruction to be

> used and for the calls to be inlined. I've just done this for x86/arm for

> now. I've put this in include/machine/ieeefp.h, rather than fdlibm.h, as

> that's where most of the other target specific code seems to be.

> 

> Not sure if using the __IEEE754_INLINE_SQRT* macros is the best way to

> prevent redefinition errors. Perhaps someone has a better idea?

> 

> Cheers,

> Jon

> 

> diff --git a/newlib/libc/include/machine/ieeefp.h

> [...]

>  #ifdef __i386__

>  #define __IEEE_LITTLE_ENDIAN

> +

> +#define __IEEE754_INLINE_SQRT

> +static inline double

> +__ieee754_sqrt (double x)

> +{

> +  double result;

> +  __asm__ ("fsqrt" : "=t" (result) : "0" (x));

> +  return result;

> +}

> +

> +#define __IEEE754_INLINE_SQRTF

> +static inline float

> +__ieee754_sqrtf (float x)

> +{

> +  float result;

> +  __asm__ ("fsqrt" : "=t" (result) : "0" (x));

> +  return result;

> +}

> +

>  #endif


Can we really imply that an i386 always comes with a FPU?


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Jon Beniston June 25, 2018, 1:04 p.m. | #2
Hi Corinna,

>Can we really imply that an i386 always comes with a FPU?


Looking through gcc's i386-c.c, we could use #ifndef _SOFT_FLOAT around that.

Cheers,
Jon
Hans-Bernhard Bröker June 26, 2018, 5:01 p.m. | #3
Am 25.06.2018 um 13:41 schrieb Corinna Vinschen:

> Can we really imply that an i386 always comes with a FPU?


Given that that would be patently untrue, I'm pretty sure we can't.

Strictly speaking one couldn't even imply it for i486, "thanks" to the 
486SX (and its incredibly silly optional companion, the 487SX).
Jon Beniston Aug. 17, 2018, 12:05 p.m. | #4
>Can we really imply that an i386 always comes with a FPU?


Here's an updated patch that doesn't inline sqrt on i386 if _SOFT_FLOAT is defined (This is what GCC seems to define to indicate if the h/w FPU is not available).

Cheers,
Jon

Patch

diff --git a/newlib/libc/include/machine/ieeefp.h
b/newlib/libc/include/machine/ieeefp.h
index 2fb2268ce..e917d74b0 100644
--- a/newlib/libc/include/machine/ieeefp.h
+++ b/newlib/libc/include/machine/ieeefp.h
@@ -87,6 +87,39 @@ 
 #  define __IEEE_BYTES_LITTLE_ENDIAN
 # endif
 #endif
+
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt(double x)
+{
+  double result;
+#if __ARM_ARCH >= 6
+  __asm__ ("vsqrt.f64 %P0, %P1" : "=w" (result) : "w" (x));
+#else
+  /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+  __asm__ ("vsqrt.f64 %P0, %P1" : "=&w" (result) : "w" (x));
+#endif
+  return result;
+}
+#endif
+
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf(float x)
+{
+  float result;
+#if __ARM_ARCH >= 6
+  __asm__ ("vsqrt.f32 %0, %1" : "=w" (result) : "w" (x));
+#else
+  /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+  __asm__ ("vsqrt.f32 %0, %1" : "=&w" (result) : "w" (x));
+#endif
+  return result;
+}
+#endif
+
 #endif
 
 #if defined (__aarch64__)
@@ -189,6 +222,25 @@ 
 
 #ifdef __i386__
 #define __IEEE_LITTLE_ENDIAN
+
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt (double x)
+{
+  double result;
+  __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+  return result;
+}
+
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf (float x)
+{
+  float result;
+  __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+  return result;
+}
+
 #endif
 
 #ifdef __riscv
diff --git a/newlib/libm/common/fdlibm.h b/newlib/libm/common/fdlibm.h
index 4523e8b2a..7eccce2b6 100644
--- a/newlib/libm/common/fdlibm.h
+++ b/newlib/libm/common/fdlibm.h
@@ -149,7 +149,9 @@  extern double significand __P((double));
 extern long double __ieee754_hypotl __P((long double, long double));
 
 /* ieee style elementary functions */
+#ifndef __IEEE754_INLINE_SQRT
 extern double __ieee754_sqrt __P((double));			
+#endif
 extern double __ieee754_acos __P((double));			
 extern double __ieee754_acosh __P((double));			
 extern double __ieee754_log __P((double));			
@@ -195,7 +197,9 @@  extern float scalbf __P((float, float));
 extern float significandf __P((float));
 
 /* ieee style elementary float functions */
+#ifndef __IEEE754_INLINE_SQRTF
 extern float __ieee754_sqrtf __P((float));			
+#endif
 extern float __ieee754_acosf __P((float));			
 extern float __ieee754_acoshf __P((float));			
 extern float __ieee754_logf __P((float));			
diff --git a/newlib/libm/machine/arm/e_sqrt.c
b/newlib/libm/machine/arm/e_sqrt.c
index 6f3eb8301..8d50ae234 100644
--- a/newlib/libm/machine/arm/e_sqrt.c
+++ b/newlib/libm/machine/arm/e_sqrt.c
@@ -24,7 +24,7 @@ 
  * SUCH DAMAGE.
  */
 
-#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRT)
 #include <math.h>
 
 double
diff --git a/newlib/libm/machine/arm/ef_sqrt.c
b/newlib/libm/machine/arm/ef_sqrt.c
index 3a1ba6cb4..3d8fd1191 100644
--- a/newlib/libm/machine/arm/ef_sqrt.c
+++ b/newlib/libm/machine/arm/ef_sqrt.c
@@ -24,7 +24,7 @@ 
  * SUCH DAMAGE.
  */
 
-#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRTF)
 #include <math.h>
 
 float
diff --git a/newlib/libm/math/e_sqrt.c b/newlib/libm/math/e_sqrt.c
index 78fc52417..313ae972c 100644
--- a/newlib/libm/math/e_sqrt.c
+++ b/newlib/libm/math/e_sqrt.c
@@ -83,6 +83,8 @@ 
 
 #include "fdlibm.h"
 
+#ifndef __IEEE754_INLINE_SQRTF
+
 #ifndef _DOUBLE_IS_32BITS
 
 #ifdef __STDC__
@@ -194,6 +196,8 @@  static	double	one	= 1.0, tiny=1.0e-300;
  
 #endif /* defined(_DOUBLE_IS_32BITS) */
 
+#endif /* __IEEE754_INLINE_SQRTF */
+
 /*
 Other methods  (use floating-point arithmetic)
 -------------
diff --git a/newlib/libm/math/ef_sqrt.c b/newlib/libm/math/ef_sqrt.c
index 80e7f360e..9940bad32 100644
--- a/newlib/libm/math/ef_sqrt.c
+++ b/newlib/libm/math/ef_sqrt.c
@@ -15,6 +15,8 @@ 
 
 #include "fdlibm.h"
 
+#ifndef __IEEE754_INLINE_SQRT
+
 #ifdef __STDC__
 static	const float	one	= 1.0, tiny=1.0e-30;
 #else
@@ -87,3 +89,5 @@  static	float	one	= 1.0, tiny=1.0e-30;
 	SET_FLOAT_WORD(z,ix);
 	return z;
 }
+
+#endif /* __IEEE754_INLINE_SQRT */