PATCH, rs6000] Optimization for vec_xl_sext

Message ID 9c39ac50-aee1-b50f-dfb3-badb6752e921@linux.ibm.com
State New
Headers show
Series
  • PATCH, rs6000] Optimization for vec_xl_sext
Related show

Commit Message

Xionghu Luo via Gcc-patches Oct. 14, 2021, 6:17 a.m.
Hi,

   The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.

   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

   I refined the patch according to Bill and David's advice. I put the patch.diff and ChangeLog in attachment also in case the indentation doesn't show correctly in email body.


ChangeLog

2021-10-11 Haochen Gui <guihaoc@linux.ibm.com>


gcc/

* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):

Modify the expansion for sign extension. All extensions are done

within VSX registers.


gcc/testsuite/

* gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff
2021-10-11 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
	Modify the expansion for sign extension. All extensions are done
	within VSX registers.

gcc/testsuite/
	* gcc.target/powerpc/p10_vec_xl_sext.c: New test.
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
 
   if (sign_extend)
     {
-      rtx discratch = gen_reg_rtx (DImode);
+      rtx discratch = gen_reg_rtx (V2DImode);
       rtx tiscratch = gen_reg_rtx (TImode);
 
       /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
 	return 0;
       emit_insn (pat);
 
-      /* Emit a sign extension from QI,HI,WI to double (DI).  */
-      rtx scratch = gen_lowpart (smode, tiscratch);
+      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+      rtx temp1, temp2;
       if (icode == CODE_FOR_vsx_lxvrbx)
-	emit_insn (gen_extendqidi2 (discratch, scratch));
+	{
+	  temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+	  emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+	}
       else if (icode == CODE_FOR_vsx_lxvrhx)
-	emit_insn (gen_extendhidi2 (discratch, scratch));
+	{
+	  temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+	  emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+	}
       else if (icode == CODE_FOR_vsx_lxvrwx)
-	emit_insn (gen_extendsidi2 (discratch, scratch));
-      /*  Assign discratch directly if scratch is already DI.  */
-      if (icode == CODE_FOR_vsx_lxvrdx)
-	discratch = scratch;
+	{
+	  temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+	  emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+	}
+      else if (icode == CODE_FOR_vsx_lxvrdx)
+	discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+      else
+	gcc_unreachable ();
 
-      /* Emit the sign extension from DI (double) to TI (quad).  */
-      emit_insn (gen_extendditi2 (target, discratch));
+      /* Emit the sign extension from V2DI (double) to TI (quad).  */
+      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+      emit_insn (gen_extendditi2_vector (target, temp2));
 
       return target;
     }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */

Comments

Xionghu Luo via Gcc-patches Oct. 14, 2021, 4:58 p.m. | #1
Hi Haochen,

The patch looks okay to me now.  Will defer to David for final call. :-)

Thanks!
Bill

On 10/14/21 1:17 AM, HAO CHEN GUI wrote:
> Hi,

>

>   The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.

>

>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

>

>   I refined the patch according to Bill and David's advice. I put the patch.diff and ChangeLog in attachment also in case the indentation doesn't show correctly in email body.

>

>

> ChangeLog

>

> 2021-10-11 Haochen Gui <guihaoc@linux.ibm.com>

>

>

> gcc/

>

> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):

>

> Modify the expansion for sign extension. All extensions are done

>

> within VSX registers.

>

>

> gcc/testsuite/

>

> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

>

> patch.diff

>

> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c

> index b4e13af4dc6..587e9fa2a2a 100644

> --- a/gcc/config/rs6000/rs6000-call.c

> +++ b/gcc/config/rs6000/rs6000-call.c

> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl

>

>    if (sign_extend)

>      {

> -      rtx discratch = gen_reg_rtx (DImode);

> +      rtx discratch = gen_reg_rtx (V2DImode);

>        rtx tiscratch = gen_reg_rtx (TImode);

>

>        /* Emit the lxvr*x insn.  */

> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl

>         return 0;

>        emit_insn (pat);

>

> -      /* Emit a sign extension from QI,HI,WI to double (DI).  */

> -      rtx scratch = gen_lowpart (smode, tiscratch);

> +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */

> +      rtx temp1, temp2;

>        if (icode == CODE_FOR_vsx_lxvrbx)

> -       emit_insn (gen_extendqidi2 (discratch, scratch));

> +       {

> +         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);

> +         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));

> +       }

>        else if (icode == CODE_FOR_vsx_lxvrhx)

> -       emit_insn (gen_extendhidi2 (discratch, scratch));

> +       {

> +         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);

> +         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));

> +       }

>        else if (icode == CODE_FOR_vsx_lxvrwx)

> -       emit_insn (gen_extendsidi2 (discratch, scratch));

> -      /*  Assign discratch directly if scratch is already DI.  */

> -      if (icode == CODE_FOR_vsx_lxvrdx)

> -       discratch = scratch;

> +       {

> +         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);

> +         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));

> +       }

> +      else if (icode == CODE_FOR_vsx_lxvrdx)

> +       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);

> +      else

> +       gcc_unreachable ();

>

> -      /* Emit the sign extension from DI (double) to TI (quad). */

> -      emit_insn (gen_extendditi2 (target, discratch));

> +      /* Emit the sign extension from V2DI (double) to TI (quad).  */

> +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);

> +      emit_insn (gen_extendditi2_vector (target, temp2));

>

>        return target;

>      }

> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c

> new file mode 100644

> index 00000000000..78e72ac5425

> --- /dev/null

> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c

> @@ -0,0 +1,35 @@

> +/* { dg-do compile } */

> +/* { dg-require-effective-target int128 } */

> +/* { dg-require-effective-target power10_ok } */

> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */

> +

> +#include <altivec.h>

> +

> +vector signed __int128

> +foo1 (signed long a, signed char *b)

> +{

> +  return vec_xl_sext (a, b);

> +}

> +

> +vector signed __int128

> +foo2 (signed long a, signed short *b)

> +{

> +  return vec_xl_sext (a, b);

> +}

> +

> +vector signed __int128

> +foo3 (signed long a, signed int *b)

> +{

> +  return vec_xl_sext (a, b);

> +}

> +

> +vector signed __int128

> +foo4 (signed long a, signed long *b)

> +{

> +  return vec_xl_sext (a, b);

> +}

> +

> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */

> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */

> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */

> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
Xionghu Luo via Gcc-patches Oct. 14, 2021, 6:53 p.m. | #2
On Thu, Oct 14, 2021 at 2:17 AM HAO CHEN GUI <guihaoc@linux.ibm.com> wrote:
>

> Hi,

>

>    The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.

>

>    Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

>

>    I refined the patch according to Bill and David's advice. I put the patch.diff and ChangeLog in attachment also in case the indentation doesn't show correctly in email body.

>

>

> ChangeLog

>

> 2021-10-11 Haochen Gui <guihaoc@linux.ibm.com>

>

>

> gcc/

>

> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):

>

> Modify the expansion for sign extension. All extensions are done

>

> within VSX registers.

>

>

> gcc/testsuite/

>

> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.


This is okay.

Thanks, David
Xionghu Luo via Gcc-patches Oct. 19, 2021, 9:01 a.m. | #3
Committed as r12-4494. Thanks to all of you.

Gui Haochen

On 15/10/2021 上午 2:53, David Edelsohn wrote:
> On Thu, Oct 14, 2021 at 2:17 AM HAO CHEN GUI <guihaoc@linux.ibm.com> wrote:

>> Hi,

>>

>>     The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.

>>

>>     Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

>>

>>     I refined the patch according to Bill and David's advice. I put the patch.diff and ChangeLog in attachment also in case the indentation doesn't show correctly in email body.

>>

>>

>> ChangeLog

>>

>> 2021-10-11 Haochen Gui <guihaoc@linux.ibm.com>

>>

>>

>> gcc/

>>

>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):

>>

>> Modify the expansion for sign extension. All extensions are done

>>

>> within VSX registers.

>>

>>

>> gcc/testsuite/

>>

>> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

> This is okay.

>

> Thanks, David

Patch

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@  altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl

    if (sign_extend)
      {
-      rtx discratch = gen_reg_rtx (DImode);
+      rtx discratch = gen_reg_rtx (V2DImode);
        rtx tiscratch = gen_reg_rtx (TImode);

        /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@  altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
         return 0;
        emit_insn (pat);

-      /* Emit a sign extension from QI,HI,WI to double (DI).  */
-      rtx scratch = gen_lowpart (smode, tiscratch);
+      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+      rtx temp1, temp2;
        if (icode == CODE_FOR_vsx_lxvrbx)
-       emit_insn (gen_extendqidi2 (discratch, scratch));
+       {
+         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+       }
        else if (icode == CODE_FOR_vsx_lxvrhx)
-       emit_insn (gen_extendhidi2 (discratch, scratch));
+       {
+         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+       }
        else if (icode == CODE_FOR_vsx_lxvrwx)
-       emit_insn (gen_extendsidi2 (discratch, scratch));
-      /*  Assign discratch directly if scratch is already DI.  */
-      if (icode == CODE_FOR_vsx_lxvrdx)
-       discratch = scratch;
+       {
+         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+       }
+      else if (icode == CODE_FOR_vsx_lxvrdx)
+       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+      else
+       gcc_unreachable ();

-      /* Emit the sign extension from DI (double) to TI (quad). */
-      emit_insn (gen_extendditi2 (target, discratch));
+      /* Emit the sign extension from V2DI (double) to TI (quad).  */
+      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+      emit_insn (gen_extendditi2_vector (target, temp2));

        return target;
      }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */