[v2,3/6] rs6000: Add support for SSE4.1 "ceil" intrinsics

Message ID 20210716135022.489455-4-pc@us.ibm.com
State New
Headers show
Series
  • rs6000: Add SSE4.1 "blend", "ceil", "floor"
Related show

Commit Message

apinski--- via Gcc-patches July 16, 2021, 1:50 p.m.
2021-07-16  Paul A. Clarke  <pc@us.ibm.com>

gcc
	* config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps,
	_mm_ceil_sd, _mm_ceil_ss): New.
---
v2: Improve formatting per review from Bill.

 gcc/config/rs6000/smmintrin.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

-- 
2.27.0

Comments

apinski--- via Gcc-patches July 16, 2021, 6:20 p.m. | #1
Hi Paul,

Thanks for the cleanup, LGTM!  Recommend maintainers approve.

Bill

On 7/16/21 8:50 AM, Paul A. Clarke wrote:
> 2021-07-16  Paul A. Clarke  <pc@us.ibm.com>

>

> gcc

> 	* config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps,

> 	_mm_ceil_sd, _mm_ceil_ss): New.

> ---

> v2: Improve formatting per review from Bill.

>

>   gcc/config/rs6000/smmintrin.h | 32 ++++++++++++++++++++++++++++++++

>   1 file changed, 32 insertions(+)

>

> diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h

> index 69e54702a877..cad770a67631 100644

> --- a/gcc/config/rs6000/smmintrin.h

> +++ b/gcc/config/rs6000/smmintrin.h

> @@ -232,6 +232,38 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)

>     return any_ones * any_zeros;

>   }

>   

> +__inline __m128d

> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))

> +_mm_ceil_pd (__m128d __A)

> +{

> +  return (__m128d) vec_ceil ((__v2df) __A);

> +}

> +

> +__inline __m128

> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))

> +_mm_ceil_ps (__m128 __A)

> +{

> +  return (__m128) vec_ceil ((__v4sf) __A);

> +}

> +

> +__inline __m128d

> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))

> +_mm_ceil_sd (__m128d __A, __m128d __B)

> +{

> +  __v2df r = vec_ceil ((__v2df) __B);

> +  r[1] = ((__v2df) __A)[1];

> +  return (__m128d) r;

> +}

> +

> +__inline __m128

> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))

> +_mm_ceil_ss (__m128 __A, __m128 __B)

> +{

> +  __v4sf r = (__v4sf) __A;

> +  r[0] = __builtin_ceil (((__v4sf) __B)[0]);

> +  return r;

> +}

> +

>   /* Return horizontal packed word minimum and its index in bits [15:0]

>      and bits [18:16] respectively.  */

>   __inline __m128i
Segher Boessenkool July 28, 2021, 10:01 p.m. | #2
Hi!

On Fri, Jul 16, 2021 at 08:50:19AM -0500, Paul A. Clarke wrote:
> 	* config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps,

> 	_mm_ceil_sd, _mm_ceil_ss): New.


This is fine.  Thanks!


Segher

Patch

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 69e54702a877..cad770a67631 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -232,6 +232,38 @@  _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
   return any_ones * any_zeros;
 }
 
+__inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ceil_pd (__m128d __A)
+{
+  return (__m128d) vec_ceil ((__v2df) __A);
+}
+
+__inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ceil_ps (__m128 __A)
+{
+  return (__m128) vec_ceil ((__v4sf) __A);
+}
+
+__inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ceil_sd (__m128d __A, __m128d __B)
+{
+  __v2df r = vec_ceil ((__v2df) __B);
+  r[1] = ((__v2df) __A)[1];
+  return (__m128d) r;
+}
+
+__inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ceil_ss (__m128 __A, __m128 __B)
+{
+  __v4sf r = (__v4sf) __A;
+  r[0] = __builtin_ceil (((__v4sf) __B)[0]);
+  return r;
+}
+
 /* Return horizontal packed word minimum and its index in bits [15:0]
    and bits [18:16] respectively.  */
 __inline __m128i